diff --git a/src/calibre/ebooks/oeb/polish/parsing.py b/src/calibre/ebooks/oeb/polish/parsing.py index b02a6c9fba..961ce8b5a9 100644 --- a/src/calibre/ebooks/oeb/polish/parsing.py +++ b/src/calibre/ebooks/oeb/polish/parsing.py @@ -10,7 +10,7 @@ import copy, re, warnings from functools import partial from bisect import bisect -from lxml.etree import ElementBase, XMLParser, ElementDefaultClassLookup, CommentBase +from lxml.etree import ElementBase, XMLParser, ElementDefaultClassLookup, CommentBase, fromstring, Element as LxmlElement from html5lib.constants import namespaces, tableInsertModeElements, EOF from html5lib.treebuilders._base import TreeBuilder as BaseTreeBuilder @@ -18,7 +18,7 @@ from html5lib.ihatexml import InfosetFilter, DataLossWarning from html5lib.html5parser import HTMLParser from calibre import xml_replace_entities -from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations from calibre.ebooks.oeb.parse_utils import fix_self_closing_cdata_tags from calibre.utils.cleantext import clean_xml_chars @@ -560,12 +560,14 @@ if len("\U0010FFFF") == 1: # UCS4 build else: replace_chars = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?