From 0b4ae4a23c0ed259f93e12896ae829ab67ef8690 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 20 Jan 2020 15:42:46 +0530 Subject: [PATCH] Clean XML of invalid chars before parsing OPF --- src/calibre/ebooks/metadata/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/utils.py b/src/calibre/ebooks/metadata/utils.py index 1eb32792ec..ceee75dd70 100644 --- a/src/calibre/ebooks/metadata/utils.py +++ b/src/calibre/ebooks/metadata/utils.py @@ -3,17 +3,17 @@ # License: GPLv3 Copyright: 2016, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals -from collections import namedtuple -from polyglot.builtins import map +from collections import namedtuple from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.base import OPF from calibre.ebooks.oeb.polish.utils import guess_type from calibre.spell import parse_lang_code +from calibre.utils.cleantext import clean_xml_chars from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.xml_parse import safe_xml_fromstring -from polyglot.builtins import filter +from polyglot.builtins import filter, map OPFVersion = namedtuple('OPFVersion', 'major minor patch') @@ -43,7 +43,7 @@ def parse_opf(stream_or_path): raise ValueError('Empty file: '+getattr(stream, 'name', 'stream')) raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True) raw = raw[raw.find('<'):] - root = safe_xml_fromstring(raw) + root = safe_xml_fromstring(clean_xml_chars(raw)) if root is None: raise ValueError('Not an OPF file') return root