Do not resolve entities when parsing XML

Resolving entities is dangerous since lxml will actually read file:// URLs in entity definitions. Fixes #1857800 [Private bug](https://bugs.launchpad.net/calibre/+bug/1857800)
2025-07-09 03:04:10 -04:00 · 2019-12-29 18:01:43 +05:30 · 2019-12-29 18:01:43 +05:30 · 68febe94ca
commit 68febe94ca
parent 589079c6aa
51 changed files with 166 additions and 164 deletions
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -92,7 +92,7 @@ def uuid():
 class XMLCache(object):
    def __init__(self, paths, ext_paths, prefixes, use_author_sort):
-        from lxml import etree
+        from calibre.utils.xml_parse import safe_xml_fromstring
        if DEBUG:
            debug_print('Building XMLCache...', paths)
@ -101,7 +101,6 @@ class XMLCache(object):
        self.use_author_sort = use_author_sort
        # Parse XML files {{{
        parser = etree.XMLParser(recover=True)
        self.roots = {}
        for source_id, path in paths.items():
            if source_id == 0:
@ -116,10 +115,9 @@ class XMLCache(object):
                    with lopen(path, 'rb') as f:
                        raw = f.read()
-            self.roots[source_id] = etree.fromstring(xml_to_unicode(
+            self.roots[source_id] = safe_xml_fromstring(
-                        raw, strip_encoding_pats=True, assume_utf8=True,
+                xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
-                        verbose=DEBUG)[0],
+            )
                        parser=parser)
            if self.roots[source_id] is None:
                raise Exception(('The SONY database at %r is corrupted. Try '
                        ' disconnecting and reconnecting your reader.')%path)
@ -136,10 +134,9 @@ class XMLCache(object):
            if os.access(path, os.W_OK):
                try:
                    with lopen(path, 'rb') as f:
-                        self.ext_roots[source_id] = etree.fromstring(
+                        self.ext_roots[source_id] = safe_xml_fromstring(
-                                xml_to_unicode(f.read(),
+                            xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
-                                    strip_encoding_pats=True, assume_utf8=True,
+                        )
                                    verbose=DEBUG)[0], parser=parser)
                        self.ext_paths[source_id] = path
                except:
                    pass
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -51,9 +51,9 @@ def return_raster_image(path):
 def extract_cover_from_embedded_svg(html, base, log):
    from lxml import etree
    from calibre.ebooks.oeb.base import XPath, SVG, XLINK
-    root = etree.fromstring(html)
+    from calibre.utils.xml_parse import safe_xml_fromstring
    root = safe_xml_fromstring(html)
    svg = XPath('//svg:svg')(root)
    if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -231,7 +231,7 @@ class EPUBInput(InputFormatPlugin):
        return removed
    def find_opf(self):
-        from lxml import etree
+        from calibre.utils.xml_parse import safe_xml_fromstring
        def attr(n, attr):
            for k, v in n.attrib.items():
@ -239,7 +239,7 @@ class EPUBInput(InputFormatPlugin):
                    return v
        try:
            with lopen('META-INF/container.xml', 'rb') as f:
-                root = etree.fromstring(f.read())
+                root = safe_xml_fromstring(f.read())
                for r in root.xpath('//*[local-name()="rootfile"]'):
                    if attr(r, 'media-type') != "application/oebps-package+xml":
                        continue
@ -356,12 +356,13 @@ class EPUBInput(InputFormatPlugin):
        from calibre.ebooks.oeb.polish.parsing import parse
        from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
        from calibre.ebooks.oeb.polish.toc import first_child
        from calibre.utils.xml_parse import safe_xml_fromstring
        from tempfile import NamedTemporaryFile
        with lopen(nav_path, 'rb') as f:
            raw = f.read()
        raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
        root = parse(raw, log=log)
-        ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
+        ncx = safe_xml_fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
        navmap = ncx[0]
        et = '{%s}type' % EPUB_NS
        bn = os.path.basename(nav_path)
--- a/src/calibre/ebooks/conversion/plugins/fb2_input.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py
@ -39,10 +39,11 @@ class FB2Input(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.utils.xml_parse import safe_xml_fromstring
        from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
-        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
+        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS
        from calibre.ebooks.chardet import xml_to_unicode
        self.log = log
        log.debug('Parsing XML...')
@ -51,15 +52,9 @@ class FB2Input(InputFormatPlugin):
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
            assume_utf8=True, resolve_entities=True)[0]
        try:
-            doc = etree.fromstring(raw)
+            doc = safe_xml_fromstring(raw)
        except etree.XMLSyntaxError:
-            try:
+            doc = safe_xml_fromstring(raw.replace('& ', '&amp;'))
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                        parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        doc = ensure_namespace(doc)
@ -99,7 +94,7 @@ class FB2Input(InputFormatPlugin):
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
                    re.DOTALL).sub('', ss)
-        styledoc = etree.fromstring(ss)
+        styledoc = safe_xml_fromstring(ss)
        transform = etree.XSLT(styledoc)
        result = transform(doc)
--- a/src/calibre/ebooks/conversion/plugins/lit_input.py
+++ b/src/calibre/ebooks/conversion/plugins/lit_input.py
@ -43,7 +43,7 @@ class LITInput(InputFormatPlugin):
                    from calibre.ebooks.txt.processor import convert_basic, \
                        separate_paragraphs_single_line
                    from calibre.ebooks.chardet import xml_to_unicode
-                    from lxml import etree
+                    from calibre.utils.xml_parse import safe_xml_fromstring
                    import copy
                    self.log('LIT file with all text in singe <pre> tag detected')
                    html = separate_paragraphs_single_line(pre.text)
@ -55,7 +55,7 @@ class LITInput(InputFormatPlugin):
                        # SmartyPants skips text inside <pre> tags
                        from calibre.ebooks.conversion.preprocess import smarten_punctuation
                        html = smarten_punctuation(html, self.log)
-                    root = etree.fromstring(html)
+                    root = safe_xml_fromstring(html)
                    body = XPath('//h:body')(root)
                    pre.tag = XHTML('div')
                    pre.text = ''
--- a/src/calibre/ebooks/conversion/plugins/lrf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/lrf_input.py
@ -20,25 +20,19 @@ class LRFInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
                Canvas, ImageBlock, RuledLine)
        self.log = log
        self.log('Generating XML')
        from calibre.ebooks.lrf.lrfparser import LRFDocument
        from calibre.utils.xml_parse import safe_xml_fromstring
        from lxml import etree
        d = LRFDocument(stream)
        d.parse()
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open(u'lrs.xml', 'wb').write(xml.encode('utf-8'))
-        parser = etree.XMLParser(no_network=True, huge_tree=True)
+        doc = safe_xml_fromstring(xml)
        try:
            doc = etree.fromstring(xml, parser=parser)
        except:
            self.log.warn('Failed to parse XML. Trying to recover')
            parser = etree.XMLParser(no_network=True, huge_tree=True,
                    recover=True)
            doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
@ -60,7 +54,7 @@ class LRFInput(InputFormatPlugin):
                    plot_map[ro] = imgstr[0].get('file')
        self.log('Converting XML to HTML...')
-        styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
+        styledoc = safe_xml_fromstring(P('templates/lrf.xsl', data=True))
        media_type = MediaType()
        styles = Styles()
        text_block = TextBlock(styles, char_button_map, plot_map, log)
--- a/src/calibre/ebooks/conversion/plugins/rtf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py
@ -251,6 +251,7 @@ class RTFInput(InputFormatPlugin):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        from calibre.utils.xml_parse import safe_xml_fromstring
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
@ -270,8 +271,7 @@ class RTFInput(InputFormatPlugin):
                self.log.exception('Failed to extract images...')
        self.log('Parsing XML...')
-        parser = etree.XMLParser(recover=True, no_network=True)
+        doc = safe_xml_fromstring(xml)
        doc = etree.fromstring(xml, parser=parser)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
@ -282,7 +282,7 @@ class RTFInput(InputFormatPlugin):
        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
-        styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
+        styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True))
        extensions = {('calibre', 'inline-class') : inline_class}
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
--- a/src/calibre/ebooks/conversion/plugins/snb_input.py
+++ b/src/calibre/ebooks/conversion/plugins/snb_input.py
@ -32,10 +32,10 @@ class SNBInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        import uuid
        from lxml import etree
        from calibre.ebooks.oeb.base import DirContainer
        from calibre.ebooks.snb.snbfile import SNBFile
        from calibre.utils.xml_parse import safe_xml_fromstring
        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
@ -52,7 +52,7 @@ class SNBInput(InputFormatPlugin):
                encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
-            meta = etree.fromstring(meta)
+            meta = safe_xml_fromstring(meta)
            l = {'title'    : './/head/name',
                  'creator'  : './/head/author',
                  'language' : './/head/language',
@ -87,7 +87,7 @@ class SNBInput(InputFormatPlugin):
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
-                toc = etree.fromstring(toc)
+                toc = safe_xml_fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
@ -96,7 +96,7 @@ class SNBInput(InputFormatPlugin):
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
-                    snbc = etree.fromstring(data)
+                    snbc = safe_xml_fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@ -18,11 +18,12 @@ from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.localization import canonicalize_lang
 from calibre.utils.logging import default_log
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
 def fromstring(raw, parser=RECOVER_PARSER):
-    return etree.fromstring(raw, parser=parser)
+    return safe_xml_fromstring(raw)
 # Read metadata {{{
--- a/src/calibre/ebooks/docx/dump.py
+++ b/src/calibre/ebooks/docx/dump.py
@ -11,6 +11,7 @@ from lxml import etree
 from calibre import walk
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.xml_parse import safe_xml_fromstring
 def pretty_all_xml_in_dir(path):
@ -19,7 +20,7 @@ def pretty_all_xml_in_dir(path):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                if raw:
-                    root = etree.fromstring(raw)
+                    root = safe_xml_fromstring(raw)
                    stream.seek(0)
                    stream.truncate()
                    stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -17,6 +17,7 @@ from lxml import etree
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.utils.localization import lang_as_iso639_1
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.img import save_cover_data_to
 from calibre.ebooks.oeb.base import urlnormalize
 from polyglot.builtins import unicode_type, string_or_bytes, range, filter
@ -69,7 +70,7 @@ class FB2MLizer(object):
        output = self.clean_text('\n'.join(output))
        if self.opts.pretty_print:
-            output = etree.tostring(etree.fromstring(output), encoding='unicode', pretty_print=True)
+            output = etree.tostring(safe_xml_fromstring(output), encoding='unicode', pretty_print=True)
        return '<?xml version="1.0" encoding="UTF-8"?>\n' + output
--- a/src/calibre/ebooks/metadata/docx.py
+++ b/src/calibre/ebooks/metadata/docx.py
@ -8,9 +8,8 @@ __docformat__ = 'restructuredtext en'
 from io import BytesIO
 from lxml import etree
 from calibre.ebooks.docx.container import DOCX
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.docx.writer.container import update_doc_props, xml2str
 from calibre.utils.imghdr import identify
@ -61,11 +60,11 @@ def set_metadata(stream, mi):
        ap_raw = c.read(ap_name)
    except Exception:
        ap_raw = None
-    cp = etree.fromstring(dp_raw)
+    cp = safe_xml_fromstring(dp_raw)
    update_doc_props(cp, mi, c.namespace)
    replacements = {}
    if ap_raw is not None:
-        ap = etree.fromstring(ap_raw)
+        ap = safe_xml_fromstring(ap_raw)
        comp = ap.makeelement('{%s}Company' % c.namespace.namespaces['ep'])
        for child in tuple(ap):
            if child.tag == comp.tag:
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -12,13 +12,12 @@ import os
 import posixpath
 from contextlib import closing
 from lxml import etree
 from calibre import CurrentDir
 from calibre.ebooks.metadata.opf import (
    get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf
 )
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localunzip import LocalZipFile
 from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace
@ -42,7 +41,7 @@ class Container(dict):
    def __init__(self, stream=None):
        if not stream:
            return
-        container = etree.fromstring(stream.read())
+        container = safe_xml_fromstring(stream.read())
        if container.get('version', None) != '1.0':
            raise EPubException("unsupported version of OCF")
        rootfiles = container.xpath('./*[local-name()="rootfiles"]')
@ -70,8 +69,7 @@ class Encryption(object):
            'http://www.idpf.org/2008/embedding'])
    def __init__(self, raw):
-        from lxml import etree
+        self.root = safe_xml_fromstring(raw) if raw else None
        self.root = etree.fromstring(raw) if raw else None
        self.entries = {}
        if self.root is not None:
            for em in self.root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -15,6 +15,7 @@ from lxml import etree
 from calibre.utils.date import parse_only_date
 from calibre.utils.img import save_cover_data_to
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.imghdr import identify
 from calibre import guess_type, guess_all_extensions, prints, force_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn
@ -315,9 +316,8 @@ def _parse_language(root, mi, ctx):
 def _get_fbroot(raw):
    parser = etree.XMLParser(recover=True, no_network=True)
    raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
-    root = etree.fromstring(raw, parser=parser)
+    root = safe_xml_fromstring(raw)
    return ensure_namespace(root)
@ -452,5 +452,5 @@ def ensure_namespace(doc):
        import re
        raw = etree.tostring(doc, encoding='unicode')
        raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
-        doc = etree.fromstring(raw)
+        doc = safe_xml_fromstring(raw)
    return doc
--- a/src/calibre/ebooks/metadata/lrx.py
+++ b/src/calibre/ebooks/metadata/lrx.py
@ -11,9 +11,9 @@ Read metadata from LRX files
 import struct
 from zlib import decompress
 from lxml import etree
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from calibre.utils.xml_parse import safe_xml_fromstring
 def _read(f, at, amount):
@ -66,7 +66,7 @@ def get_metadata(f):
        info = decompress(f.read(compressed_size))
        if len(info) != uncompressed_size:
            raise ValueError('LRX file has malformed metadata section')
-        root = etree.fromstring(info)
+        root = safe_xml_fromstring(info)
        bi = root.find('BookInfo')
        title = bi.find('Title')
        title_sort = title.get('reading', None)
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -23,6 +23,7 @@ from calibre.utils.localization import get_lang, canonicalize_lang
 from calibre import prints, guess_type
 from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.utils.config import tweaks
 from calibre.utils.xml_parse import safe_xml_fromstring
 from polyglot.builtins import iteritems, unicode_type, getcwd, map
 from polyglot.urllib import unquote, urlparse
@ -1588,7 +1589,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
                is None else default_lang)
        mi.languages = [lang]
-    root = etree.fromstring(textwrap.dedent(
+    root = safe_xml_fromstring(textwrap.dedent(
    '''
    <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
        <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
--- a/src/calibre/ebooks/metadata/opf3_test.py
+++ b/src/calibre/ebooks/metadata/opf3_test.py
@ -7,9 +7,8 @@ from collections import defaultdict
 from io import BytesIO
 import unittest
 from lxml import etree
 from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata.opf3 import (
    parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers,
@ -37,7 +36,7 @@ class TestOPF3(unittest.TestCase):
    ae = unittest.TestCase.assertEqual
    def get_opf(self, metadata='', manifest=''):
-        return etree.fromstring(TEMPLATE.format(metadata=metadata, manifest=manifest))
+        return safe_xml_fromstring(TEMPLATE.format(metadata=metadata, manifest=manifest))
    def test_prefix_parsing(self):  # {{{
        self.ae(parse_prefixes('foaf: http://xmlns.com/foaf/spec/\n dbp: http://dbpedia.org/ontology/'),
@ -523,7 +522,7 @@ class TestOPF3(unittest.TestCase):
                    self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3))
        mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata()
-        root = etree.fromstring(raw)
+        root = safe_xml_fromstring(raw)
        root.set('version', '3.0')
        mi3, _, raster_cover, first_spine_item  = read_metadata(root, return_extra_data=True)
        self.assertIsNone(raster_cover)
--- a/src/calibre/ebooks/metadata/snb.py
+++ b/src/calibre/ebooks/metadata/snb.py
@ -9,7 +9,7 @@ import os
 import io
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.snb.snbfile import SNBFile
-from lxml import etree
+from calibre.utils.xml_parse import safe_xml_fromstring
 def get_metadata(stream, extract_cover=True):
@ -27,7 +27,7 @@ def get_metadata(stream, extract_cover=True):
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
-            meta = etree.fromstring(meta)
+            meta = safe_xml_fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -49,7 +49,7 @@ class Douban(Source):
    name = 'Douban Books'
    author = 'Li Fanxi'
-    version = (2, 1, 1)
+    version = (2, 1, 2)
    minimum_calibre_version = (2, 80, 0)
    description = _('Downloads metadata and covers from Douban.com. '
@ -119,8 +119,10 @@ class Douban(Source):
        try:
            log.info(id_url)
            raw = get_details(browser, id_url, timeout)
-            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+            feed = etree.fromstring(
-                strip_encoding_pats=True)[0])
+                xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
                parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
            )
            extra = entry(feed)[0]
        except:
            log.exception('Failed to get additional details for', mi.title)
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -105,7 +105,8 @@ def to_metadata(browser, log, entry_, timeout):  # {{{
    try:
        raw = get_details(browser, id_url, timeout)
        feed = etree.fromstring(
-            xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0]
+            xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
            parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
        )
        extra = entry(feed)[0]
    except:
@ -173,7 +174,7 @@ def to_metadata(browser, log, entry_, timeout):  # {{{
 class GoogleBooks(Source):
    name = 'Google'
-    version = (1, 0, 0)
+    version = (1, 0, 1)
    minimum_calibre_version = (2, 80, 0)
    description = _('Downloads metadata and covers from Google Books')
@ -371,10 +372,9 @@ class GoogleBooks(Source):
            return as_unicode(e)
        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(
                xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
-                parser=parser
+                parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
            )
            entries = entry(feed)
        except Exception as e:
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -12,6 +12,7 @@ from lxml.builder import ElementMaker
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.cleantext import clean_xml_chars
 from polyglot.builtins import unicode_type, getcwd
 from polyglot.urllib import unquote, urlparse
@ -177,8 +178,7 @@ class TOC(list):
            with open(toc, 'rb') as f:
                raw  = xml_to_unicode(f.read(), assume_utf8=True,
                        strip_encoding_pats=True)[0]
-            root = etree.fromstring(raw, parser=etree.XMLParser(recover=True,
+            root = safe_xml_fromstring(raw)
                no_network=True))
        xpn = {'re': 'http://exslt.org/regular-expressions'}
        XPath = functools.partial(etree.XPath, namespaces=xpn)
--- a/src/calibre/ebooks/metadata/utils.py
+++ b/src/calibre/ebooks/metadata/utils.py
@ -6,17 +6,15 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 from collections import namedtuple
 from polyglot.builtins import map
 from lxml import etree
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.base import OPF
 from calibre.ebooks.oeb.polish.utils import guess_type
 from calibre.spell import parse_lang_code
 from calibre.utils.localization import lang_as_iso639_1
 from calibre.utils.xml_parse import safe_xml_fromstring
 from polyglot.builtins import filter
 PARSER = etree.XMLParser(recover=True, no_network=True)
 OPFVersion = namedtuple('OPFVersion', 'major minor patch')
@ -45,7 +43,7 @@ def parse_opf(stream_or_path):
        raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
    raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True)
    raw = raw[raw.find('<'):]
-    root = etree.fromstring(raw, PARSER)
+    root = safe_xml_fromstring(raw)
    if root is None:
        raise ValueError('Not an OPF file')
    return root
--- a/src/calibre/ebooks/metadata/xmp.py
+++ b/src/calibre/ebooks/metadata/xmp.py
@ -14,6 +14,7 @@ from lxml.builder import ElementMaker
 from calibre import prints
 from calibre.ebooks.metadata import check_isbn, check_doi
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.opf2 import dump_dict
 from calibre.utils.date import parse_date, isoformat, now
@ -74,9 +75,9 @@ def parse_xmp_packet(raw_bytes):
            enc = emap.get(m.group(1), enc)
            break
    if enc is None:
-        return etree.fromstring(raw_bytes)
+        return safe_xml_fromstring(raw_bytes)
    raw = _xml_declaration.sub('', raw_bytes.decode(enc))  # lxml barfs if encoding declaration present in unicode string
-    return etree.fromstring(raw)
+    return safe_xml_fromstring(raw)
 def serialize_xmp_packet(root, encoding='utf-8'):
--- a/src/calibre/ebooks/mobi/writer8/toc.py
+++ b/src/calibre/ebooks/mobi/writer8/toc.py
@ -6,8 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-from lxml import etree
+from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.oeb.base import (urlnormalize, XPath, XHTML_NS, XHTML,
        XHTML_MIME, css_text)
@ -88,7 +87,7 @@ class TOCAdder(object):
                    'body { font-family: %s }'%s.body_font_family]
            embed_css = '\n\n'.join(css)
-        root = etree.fromstring(TEMPLATE.format(xhtmlns=XHTML_NS,
+        root = safe_xml_fromstring(TEMPLATE.format(xhtmlns=XHTML_NS,
            title=self.title, embed_css=embed_css,
            extra_css=(opts.extra_css or '')))
        parent = XPath('//h:ul')(root)[0]
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -19,6 +19,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
 from calibre import CurrentDir, walk
 from calibre.ebooks.oeb.base import _css_logger
 from calibre.utils.xml_parse import safe_xml_fromstring
 from polyglot.builtins import unicode_type, string_or_bytes, filter, getcwd, as_bytes
@ -45,7 +46,7 @@ class Extract(ODF2XHTML):
                ol.set('start', val)
    def fix_markup(self, html, log):
-        root = etree.fromstring(html)
+        root = safe_xml_fromstring(html)
        self.filter_css(root, log)
        self.extract_css(root, log)
        self.epubify_markup(root, log)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,11 +16,11 @@ from lxml import etree, html
 from calibre import force_unicode
 from calibre.constants import filesystem_encoding, __version__, ispy3
 from calibre.translations.dynamic import translate
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
 from calibre import (isbytestring, as_unicode, get_types_map)
-from calibre.ebooks.oeb.parse_utils import (barename, XHTML_NS, RECOVER_PARSER,
+from calibre.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace, XHTML, parse_html, NotHTML
        namespace, XHTML, parse_html, NotHTML)
 from calibre.utils.cleantext import clean_xml_chars
 from calibre.utils.short_uuid import uuid4
 from polyglot.builtins import iteritems, unicode_type, string_or_bytes, range, itervalues, filter, codepoint_to_chr
@ -946,7 +946,7 @@ class Manifest(object):
                return
            data = xml_to_unicode(data, strip_encoding_pats=True,
                    assume_utf8=True, resolve_entities=True)[0]
-            return etree.fromstring(data, parser=RECOVER_PARSER)
+            return safe_xml_fromstring(data)
        def _parse_xhtml(self, data):
            orig_data = data
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -11,6 +11,7 @@ import re
 from lxml import etree, html
 from calibre import xml_replace_entities, force_unicode
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.constants import filesystem_encoding
 from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
 from polyglot.builtins import iteritems, itervalues, unicode_type, string_or_bytes, map
@ -114,12 +115,7 @@ def _html4_parse(data):
            elem.text = elem.text.strip('-')
    data = etree.tostring(data, encoding='unicode')
-    # Setting huge_tree=True causes crashes in windows with large files
+    data = safe_xml_fromstring(data)
    parser = etree.XMLParser(no_network=True)
    try:
        data = etree.fromstring(data, parser=parser)
    except etree.XMLSyntaxError:
        data = etree.fromstring(data, parser=RECOVER_PARSER)
    return data
@ -210,19 +206,16 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
    data = data.replace('\0', '')
    data = raw = clean_word_doc(data, log)
    # Setting huge_tree=True causes crashes in windows with large files
    parser = etree.XMLParser(no_network=True)
    # Try with more & more drastic measures to parse
    try:
-        data = etree.fromstring(data, parser=parser)
+        data = safe_xml_fromstring(data)
        check_for_html5(pre, data)
    except (HTML5Doc, etree.XMLSyntaxError):
        log.debug('Initial parse failed, using more'
                ' forgiving parsers')
        raw = data = xml_replace_entities(raw)
        try:
-            data = etree.fromstring(data, parser=parser)
+            data = safe_xml_fromstring(data)
            check_for_html5(pre, data)
        except (HTML5Doc, etree.XMLSyntaxError):
            log.debug('Parsing %s as HTML' % filename)
@ -251,7 +244,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        if barename(data.tag) in non_html_file_tags:
            raise NotHTML(data.tag)
        log.warn('File %r does not appear to be (X)HTML'%filename)
-        nroot = etree.fromstring('<html></html>')
+        nroot = safe_xml_fromstring('<html></html>')
        has_body = False
        for child in list(data):
            if isinstance(child.tag, (unicode_type, bytes)) and barename(child.tag) == 'body':
@ -260,7 +253,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        parent = nroot
        if not has_body:
            log.warn('File %r appears to be a HTML fragment'%filename)
-            nroot = etree.fromstring('<html><body/></html>')
+            nroot = safe_xml_fromstring('<html><body/></html>')
            parent = nroot[0]
        for child in list(data.iter()):
            oparent = child.getparent()
@ -276,12 +269,12 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        data = etree.tostring(data, encoding='unicode')
        try:
-            data = etree.fromstring(data, parser=parser)
+            data = safe_xml_fromstring(data)
        except:
            data = data.replace(':=', '=').replace(':>', '>')
            data = data.replace('<http:/>', '')
            try:
-                data = etree.fromstring(data, parser=parser)
+                data = safe_xml_fromstring(data)
            except etree.XMLSyntaxError:
                log.warn('Stripping comments from %s'%
                        filename)
@ -292,12 +285,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
                    '')
                data = data.replace("<?xml version='1.0' encoding='utf-8'??>", '')
                try:
-                    data = etree.fromstring(data,
+                    data = safe_xml_fromstring(data)
                            parser=RECOVER_PARSER)
                except etree.XMLSyntaxError:
                    log.warn('Stripping meta tags from %s'% filename)
                    data = re.sub(r'<meta\s+[^>]+?>', '', data)
-                    data = etree.fromstring(data, parser=RECOVER_PARSER)
+                    data = safe_xml_fromstring(data)
    elif namespace(data.tag) != XHTML_NS:
        # OEB_DOC_NS, but possibly others
        ns = namespace(data.tag)
--- a/src/calibre/ebooks/oeb/polish/check/parsing.py
+++ b/src/calibre/ebooks/oeb/polish/check/parsing.py
@ -7,11 +7,12 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
-from lxml.etree import XMLParser, fromstring, XMLSyntaxError
+from lxml.etree import XMLSyntaxError
 import css_parser
 from calibre import force_unicode, human_readable, prepare_string_for_xml
 from calibre.ebooks.chardet import replace_encoding_declarations, find_declared_encoding
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.html_entities import html5_entities
 from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
 from calibre.ebooks.oeb.polish.utils import PositionFinder, guess_type
@ -276,7 +277,6 @@ def check_xml_parsing(name, mt, raw):
    # Get rid of entities as named entities trip up the XML parser
    eproc = EntitityProcessor(mt)
    eraw = entity_pat.sub(eproc, raw)
    parser = XMLParser(recover=False)
    errcls = HTMLParseError if mt in OEB_DOCS else XMLParseError
    errors = []
    if eproc.ok_named_entities:
@ -288,7 +288,7 @@ def check_xml_parsing(name, mt, raw):
            errors.append(BadEntity(ent, name, lnum, col))
    try:
-        root = fromstring(eraw, parser=parser)
+        root = safe_xml_fromstring(eraw, recover=False)
    except UnicodeDecodeError:
        return errors + [DecodeError(name)]
    except XMLSyntaxError as err:
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -18,7 +18,6 @@ from io import BytesIO
 from itertools import count
 from css_parser import getUrls, replaceUrls
 from lxml import etree
 from calibre import CurrentDir, walk
 from calibre.constants import iswindows
@ -42,7 +41,7 @@ from calibre.ebooks.oeb.base import (
    DC11_NS, OEB_DOCS, OEB_STYLES, OPF, OPF2_NS, Manifest, itercsslinks, iterlinks,
    rewrite_links, serialize, urlquote, urlunquote
 )
-from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER, NotHTML, parse_html
+from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html
 from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
 from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
 from calibre.ebooks.oeb.polish.utils import (
@ -52,6 +51,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryF
 from calibre.utils.filenames import hardlink_file, nlinks_file
 from calibre.utils.ipc.simple_worker import WorkerError, fork_job
 from calibre.utils.logging import default_log
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.zipfile import ZipFile
 from polyglot.builtins import iteritems, map, unicode_type, zip
 from polyglot.urllib import urlparse
@ -201,7 +201,7 @@ class ContainerBase(object):  # {{{
        data, self.used_encoding = xml_to_unicode(
            data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)
        data = unicodedata.normalize('NFC', data)
-        return etree.fromstring(data, parser=RECOVER_PARSER)
+        return safe_xml_fromstring(data)
    def parse_xhtml(self, data, fname='<string>', force_html5_parse=False):
        if self.tweak_mode:
@ -1178,7 +1178,7 @@ class EpubContainer(Container):
        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
-        container = etree.fromstring(open(container_path, 'rb').read())
+        container = safe_xml_fromstring(open(container_path, 'rb').read())
        opf_files = container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')
--- a/src/calibre/ebooks/oeb/polish/parsing.py
+++ b/src/calibre/ebooks/oeb/polish/parsing.py
@ -7,10 +7,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
-from lxml.etree import XMLParser, fromstring, Element as LxmlElement
+from lxml.etree import Element as LxmlElement
 import html5_parser
 from calibre import xml_replace_entities
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
 from calibre.utils.cleantext import clean_xml_chars
 from polyglot.builtins import unicode_type
@ -77,8 +78,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
    if force_html5_parse:
        return parse_html5(raw, log=log, line_numbers=line_numbers, linenumber_attribute=linenumber_attribute, replace_entities=False, fix_newlines=False)
    try:
-        parser = XMLParser(no_network=True)
+        ans = safe_xml_fromstring(raw)
        ans = fromstring(raw, parser=parser)
        if ans.tag != '{%s}html' % XHTML_NS:
            raise ValueError('Root tag is not <html> in the XHTML namespace')
        if linenumber_attribute:
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -21,6 +21,7 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
                                    urlnormalize, BINARY_MIME, \
                                    OEBError, OEBBook, DirContainer
 from calibre.ebooks.oeb.writer import OEBWriter
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.cleantext import clean_xml_chars
 from calibre.utils.localization import get_lang
 from calibre.ptempfile import TemporaryDirectory
@ -108,23 +109,18 @@ class OEBReader(object):
        data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0(/*)',
                OPF1_NS, data)
        try:
-            opf = etree.fromstring(data)
+            opf = safe_xml_fromstring(data)
        except etree.XMLSyntaxError:
            data = xml_replace_entities(clean_xml_chars(data), encoding=None)
            try:
-                opf = etree.fromstring(data)
+                opf = safe_xml_fromstring(data)
                self.logger.warn('OPF contains invalid HTML named entities')
            except etree.XMLSyntaxError:
                data = re.sub(r'(?is)<tours>.+</tours>', '', data)
                data = data.replace('<dc-metadata>',
                    '<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">')
-                try:
+                opf = safe_xml_fromstring(data)
                    opf = etree.fromstring(data)
                self.logger.warn('OPF contains invalid tours section')
                except etree.XMLSyntaxError:
                    self.logger.warn('OPF contains invalid markup, trying to parse it anyway')
                    from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
                    opf = etree.fromstring(data, parser=RECOVER_PARSER)
        ns = namespace(opf.tag)
        if ns not in ('', OPF1_NS, OPF2_NS):
--- a/src/calibre/ebooks/oeb/transforms/cover.py
+++ b/src/calibre/ebooks/oeb/transforms/cover.py
@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
 import textwrap
 from lxml import etree
 from calibre import guess_type
 from calibre.utils.imghdr import identify
 from calibre.utils.xml_parse import safe_xml_fromstring
 from polyglot.builtins import unicode_type
 from polyglot.urllib import unquote
@ -156,7 +156,7 @@ class CoverManager(object):
                tp = templ%unquote(href)
                id, href = m.generate('titlepage', 'titlepage.xhtml')
                item = m.add(id, href, guess_type('t.xhtml')[0],
-                        data=etree.fromstring(tp))
+                        data=safe_xml_fromstring(tp))
        else:
            item = self.oeb.manifest.hrefs[
                    urldefrag(self.oeb.guide['titlepage'].href)[0]]
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -129,9 +129,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
 def parse_outline(raw, output_dir):
    from lxml import etree
-    from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
+    from calibre.utils.xml_parse import safe_xml_fromstring
    raw = clean_xml_chars(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0])
-    outline = etree.fromstring(raw, parser=RECOVER_PARSER).xpath('(//outline)[1]')
+    outline = safe_xml_fromstring(raw).xpath('(//outline)[1]')
    if outline:
        from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
        outline = outline[0]
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -12,6 +12,7 @@ from itertools import count
 from lxml import etree
 from polyglot.builtins import range, map
 from calibre.utils.xml_parse import safe_xml_fromstring
 class Font(object):
@ -622,8 +623,7 @@ class PDFDocument(object):
    def __init__(self, xml, opts, log):
        self.opts, self.log = opts, log
-        parser = etree.XMLParser(recover=True)
+        self.root = safe_xml_fromstring(xml)
        self.root = etree.fromstring(xml, parser=parser)
        idc = count()
        self.fonts = []
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -14,6 +14,7 @@ import re
 from lxml import etree
 from calibre.ebooks.pdb.ereader import image_name
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.pml import unipmlcode
 from polyglot.builtins import unicode_type, string_or_bytes
@ -138,7 +139,7 @@ class PMLMLizer(object):
            self.log.debug('Converting %s to PML markup...' % item.href)
            content = etree.tostring(item.data, encoding='unicode')
            content = self.prepare_text(content)
-            content = etree.fromstring(content)
+            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(content.find(XHTML('body')), stylizer, item)
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -109,6 +109,7 @@ class RTFMLizer(object):
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = self.header()
        if 'titlepage' in self.oeb_book.guide:
            href = self.oeb_book.guide['titlepage'].href
@ -126,7 +127,7 @@ class RTFMLizer(object):
            content = re.sub('<!--.*?-->', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
            content = self.remove_newlines(content)
            content = self.remove_tabs(content)
-            content = etree.fromstring(content)
+            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            self.currently_dumping_item = item
            output += self.dump_text(content.find(XHTML('body')), stylizer)
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@ -84,6 +84,7 @@ class SNBMLizer(object):
    def mlize(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = [u'']
        stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
        content = etree.tostring(self.item.data.find(XHTML('body')), encoding='unicode')
@ -98,7 +99,7 @@ class SNBMLizer(object):
            etree.SubElement(snbcTree, "body")
            trees[subitem] = snbcTree
        output.append('%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
-        output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0]
+        output += self.dump_text(self.subitems, safe_xml_fromstring(content), stylizer)[0]
        output = self.cleanup_text(''.join(output))
        subitem = ''
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -67,6 +67,7 @@ class TXTMLizer(object):
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
@ -76,7 +77,7 @@ class TXTMLizer(object):
                    x.text = x.text.replace('--', '__')
            content = etree.tostring(item.data, encoding='unicode')
            content = self.remove_newlines(content)
-            content = etree.fromstring(content)
+            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer, item)
            output += '\n\n\n\n\n\n'
--- a/src/calibre/gui2/dialogs/opml.py
+++ b/src/calibre/gui2/dialogs/opml.py
@ -15,6 +15,7 @@ from PyQt5.Qt import (
 from lxml import etree
 from calibre.gui2 import choose_files, error_dialog
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.icu import sort_key
 from polyglot.builtins import unicode_type
@ -32,7 +33,7 @@ def uniq(vals, kmap=lambda x:x):
 def import_opml(raw, preserve_groups=True):
-    root = etree.fromstring(raw)
+    root = safe_xml_fromstring(raw)
    groups = defaultdict(list)
    ax = etree.XPath('ancestor::outline[@title or @text]')
    for outline in root.xpath('//outline[@type="rss" and @xmlUrl]'):
--- a/src/calibre/gui2/store/opensearch_store.py
+++ b/src/calibre/gui2/store/opensearch_store.py
@ -8,12 +8,11 @@ __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import etree
 from PyQt5.Qt import QUrl
 from calibre import (browser, guess_extension)
 from calibre.gui2 import open_url
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
@ -36,7 +35,7 @@ def open_search(url, query, max_results=10, timeout=60):
    counter = max_results
    br = browser()
    with closing(br.open(url, timeout=timeout)) as f:
-        doc = etree.fromstring(f.read())
+        doc = safe_xml_fromstring(f.read())
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break
--- a/src/calibre/gui2/store/stores/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
-store_version = 5  # Needed for dynamic plugin loading
+store_version = 6  # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, 2013, John Schember <john@nachtimwald.com>'
@ -43,7 +43,7 @@ def search(query, max_results=10, timeout=60, write_raw_to=None):
        if write_raw_to is not None:
            with open(write_raw_to, 'wb') as f:
                f.write(raw)
-        doc = etree.fromstring(raw)
+        doc = etree.fromstring(raw, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break
@ -63,7 +63,7 @@ def search(query, max_results=10, timeout=60, write_raw_to=None):
            # Get the formats and direct download links.
            with closing(br.open(id, timeout=timeout/4)) as nf:
-                ndoc = etree.fromstring(nf.read())
+                ndoc = etree.fromstring(nf.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
                for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
                    type = link.get('type')
                    href = link.get('href')
--- a/src/calibre/gui2/store/stores/litres_plugin.py
+++ b/src/calibre/gui2/store/stores/litres_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
-store_version = 1  # Needed for dynamic plugin loading
+store_version = 2  # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
@ -63,8 +63,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
            ungzipResponse(r,br)
            raw= xml_to_unicode(r.read(), strip_encoding_pats=True, assume_utf8=True)[0]
-            parser = etree.XMLParser(recover=True, no_network=True)
+            doc = etree.fromstring(raw, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
            doc = etree.fromstring(raw, parser=parser)
            for data in doc.xpath('//*[local-name() = "fb2-book"]'):
                if counter <= 0:
                    break
--- a/src/calibre/gui2/store/stores/manybooks_plugin.py
+++ b/src/calibre/gui2/store/stores/manybooks_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
-store_version = 1  # Needed for dynamic plugin loading
+store_version = 2  # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -46,7 +46,7 @@ def search_manybooks(query, max_results=10, timeout=60, open_search_url='http://
    with closing(br.open(url, timeout=timeout)) as f:
        raw_data = f.read()
        raw_data = raw_data.decode('utf-8', 'replace')
-        doc = etree.fromstring(raw_data)
+        doc = etree.fromstring(raw_data, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break
@ -71,7 +71,7 @@ def search_manybooks(query, max_results=10, timeout=60, open_search_url='http://
            # Follow the detail link to get the rest of the info.
            with closing(br.open(detail_href, timeout=timeout/4)) as df:
-                ddoc = etree.fromstring(df.read())
+                ddoc = etree.fromstring(df.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
                ddata = ddoc.xpath('//*[local-name() = "entry"][1]')
                if ddata:
                    ddata = ddata[0]
--- a/src/calibre/gui2/store/stores/xinxii_plugin.py
+++ b/src/calibre/gui2/store/stores/xinxii_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
-store_version = 1  # Needed for dynamic plugin loading
+store_version = 2  # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -47,7 +47,7 @@ class XinXiiStore(BasicStoreConfig, OpenSearchOPDSStore):
        counter = max_results
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = etree.fromstring(f.read())
+            doc = etree.fromstring(f.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
            for data in doc.xpath('//*[local-name() = "entry"]'):
                if counter <= 0:
                    break
--- a/src/calibre/gui2/tweak_book/diff/view.py
+++ b/src/calibre/gui2/tweak_book/diff/view.py
@ -28,6 +28,7 @@ from calibre.gui2.tweak_book.editor.text import PlainTextEdit, default_font_fami
 from calibre.gui2.tweak_book.editor.themes import theme_color, get_theme
 from calibre.gui2.tweak_book.diff import get_sequence_matcher
 from calibre.gui2.tweak_book.diff.highlight import get_highlighter
 from calibre.utils.xml_parse import safe_xml_fromstring
 Change = namedtuple('Change', 'ltop lbot rtop rbot kind')
@ -47,7 +48,7 @@ def beautify_text(raw, syntax):
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
-        root = etree.fromstring(strip_encoding_declarations(raw))
+        root = safe_xml_fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -21,6 +21,7 @@ from calibre import (
    replace_entities, strftime, xml_replace_entities
 )
 from calibre.constants import cache_dir, isosx
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.customize.conversion import DummyReporter
 from calibre.customize.ui import output_profiles
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, prettify
@ -2992,7 +2993,7 @@ class CatalogBuilder(object):
            <navMap/>
            </ncx>
        '''
-        root = self.ncx_root = etree.fromstring(header)
+        root = self.ncx_root = safe_xml_fromstring(header)
        navMapTag = root[0]
        if self.generate_for_kindle_mobi:
@ -3668,7 +3669,7 @@ class CatalogBuilder(object):
                lang=prepare_string_for_xml(lang),
                pt="periodical:default" if self.generate_for_kindle_mobi else ""
        )
-        root = etree.fromstring(header)
+        root = safe_xml_fromstring(header)
        manifest = root.xpath('//*[local-name()="manifest"]')[0]
        spine = root.xpath('//*[local-name()="spine"]')[0]
        guide = root.xpath('//*[local-name()="guide"]')[0]
--- a/src/calibre/spell/import_from.py
+++ b/src/calibre/spell/import_from.py
@ -10,6 +10,7 @@ import sys, glob, os, tempfile, re, codecs
 from lxml import etree
 from calibre.constants import config_dir
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.zipfile import ZipFile
 from polyglot.builtins import iteritems
@ -26,7 +27,7 @@ BUILTIN_LOCALES = {'en-US', 'en-GB', 'es-ES'}
 def parse_xcu(raw, origin='%origin%'):
    ' Get the dictionary and affix file names as well as supported locales for each dictionary '
    ans = {}
-    root = etree.fromstring(raw)
+    root = safe_xml_fromstring(raw)
    for node in XPath('//prop[@oor:name="Format"]/value[text()="DICT_SPELL"]/../..')(root):
        value = XPath('descendant::prop[@oor:name="Locations"]/value')(node)
@ -123,7 +124,7 @@ def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
                    key = key[3:]
                return zf.open(key.lstrip('/')).read()
-        root = etree.fromstring(zf.open('META-INF/manifest.xml').read())
+        root = safe_xml_fromstring(zf.open('META-INF/manifest.xml').read())
        xcu = XPath('//manifest:file-entry[@manifest:media-type="application/vnd.sun.star.configuration-data"]')(root)[0].get(
            '{%s}full-path' % NS_MAP['manifest'])
        for (dic, aff), locales in iteritems(parse_xcu(zf.open(xcu).read(), origin='')):
--- a/src/calibre/srv/opds.py
+++ b/src/calibre/srv/opds.py
@ -15,6 +15,7 @@ from lxml.builder import ElementMaker
 from calibre.constants import __appname__
 from calibre.db.view import sanitize_sort_field_name
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.ebooks.metadata import fmt_sidx, authors_to_string, rating_to_stars
 from calibre.library.comments import comments_to_html
 from calibre import guess_type, prepare_string_for_xml as xml
@ -123,7 +124,7 @@ def html_to_lxml(raw):
    root.set('xmlns', "http://www.w3.org/1999/xhtml")
    raw = etree.tostring(root, encoding=None)
    try:
-        return etree.fromstring(raw)
+        return safe_xml_fromstring(raw)
    except:
        for x in root.iterdescendants():
            remove = []
@ -134,7 +135,7 @@ def html_to_lxml(raw):
                del x.attrib[a]
        raw = etree.tostring(root, encoding=None)
        try:
-            return etree.fromstring(raw)
+            return safe_xml_fromstring(raw)
        except:
            from calibre.ebooks.oeb.parse_utils import _html4_parse
            return _html4_parse(raw)
--- a/src/calibre/utils/opensearch/description.py
+++ b/src/calibre/utils/opensearch/description.py
@ -11,9 +11,8 @@ __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import etree
 from calibre import browser
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.utils.opensearch.url import URL
@ -38,7 +37,7 @@ class Description(object):
        '''
        br = browser()
        with closing(br.open(url, timeout=15)) as f:
-            doc = etree.fromstring(f.read())
+            doc = safe_xml_fromstring(f.read())
        # version 1.1 has repeating Url elements.
        self.urls = []
--- a/src/calibre/utils/xml_parse.py
+++ b/src/calibre/utils/xml_parse.py
@ -0,0 +1,19 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals
 from lxml import etree
 # resolve_entities is turned off as entities can cause
 # reads of local files, for example:
 # <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
 SAFE_XML_PARSER = etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
 SAFE_XML_PARSER_NO_RECOVER = etree.XMLParser(recover=False, no_network=True, resolve_entities=False)
 fs = etree.fromstring
 def safe_xml_fromstring(string_or_bytes, recover=True):
    return fs(string_or_bytes, SAFE_XML_PARSER if recover else SAFE_XML_PARSER_NO_RECOVER)
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -14,6 +14,7 @@ from lxml import etree
 from lxml.builder import ElementMaker
 from calibre import force_unicode
 from calibre.utils.xml_parse import safe_xml_fromstring
 from calibre.constants import numeric_version
 from calibre.utils.iso8601 import parse_iso8601
 from calibre.utils.date import now as nowf, utcnow, local_tz, isoformat, EPOCH, UNDEFINED_DATE
@ -124,7 +125,7 @@ def get_custom_recipe_collection(*args):
            import traceback
            traceback.print_exc()
            continue
-    return etree.fromstring(serialize_collection(rmap))
+    return safe_xml_fromstring(serialize_collection(rmap))
 def update_custom_recipe(id_, title, script):
@ -287,7 +288,7 @@ class SchedulerConfig(object):
        if os.access(self.conf_path, os.R_OK):
            with ExclusiveFile(self.conf_path) as f:
                try:
-                    self.root = etree.fromstring(f.read())
+                    self.root = safe_xml_fromstring(f.read())
                except:
                    print('Failed to read recipe scheduler config')
                    import traceback