diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py
index 7ee9f5131f..1bb13d21a2 100644
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@@ -91,7 +91,11 @@ def html5_parse(data, max_nesting_depth=100):
 
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
-        data = html5lib.parse(data, treebuilder='lxml').getroot()
+        try:
+            data = html5lib.parse(data, treebuilder='lxml').getroot()
+        except ValueError:
+            from calibre.utils.cleantext import clean_xml_chars
+            data = html5lib.parse(clean_xml_chars(data), treebuilder='lxml').getroot()
 
     # Check that the asinine HTML 5 algorithm did not result in a tree with
     # insane nesting depths
diff --git a/src/calibre/ebooks/oeb/polish/tests/parsing.py b/src/calibre/ebooks/oeb/polish/tests/parsing.py
index ce5d18b494..2bc2dff96f 100644
--- a/src/calibre/ebooks/oeb/polish/tests/parsing.py
+++ b/src/calibre/ebooks/oeb/polish/tests/parsing.py
@@ -94,8 +94,10 @@ def space_characters(test, parse_function):
     root = parse_function(markup)
     err = 'form feed character not converted, parsed markup:\n' + etree.tostring(root)
     test.assertNotIn('\u000c', root.xpath('//*[local-name()="p"]')[0].text, err)
-    markup = '<html><p>\u000b\u000c</p>'
+    markup = '<html><p>a\u000b\u000c</p>'
     root = parse_function(markup)  # Should strip non XML safe control code \u000b
+    test.assertNotIn('\u000b', root.xpath('//*[local-name()="p"]')[0].text, err)
+    test.assertNotIn('\u000c', root.xpath('//*[local-name()="p"]')[0].text, err)
 
 def case_insensitive_element_names(test, parse_function):
     markup = '<HTML><P> </p>'