diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py
index 96fb28e923..3599c4a477 100644
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@@ -125,8 +125,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
def parse_outline(raw, output_dir):
from lxml import etree
+ from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
raw = clean_xml_chars(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0])
- outline = etree.fromstring(raw).xpath('(//outline)[1]')
+ outline = etree.fromstring(raw, parser=RECOVER_PARSER).xpath('(//outline)[1]')
if outline:
from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
outline = outline[0]