diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py
index ff2112e620..ec1ae9366e 100644
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@@ -97,7 +97,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
raise DRMError()
if not as_xml:
- with open(index, 'r+b') as i:
+ with lopen(index, 'r+b') as i:
raw = i.read()
raw = flip_images(raw)
raw = '\n' + raw
@@ -107,6 +107,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
# breaks the pdf heuristics regexps, so replace them
raw = raw.replace(b'
', b'
')
raw = re.sub(br' 2: