From ed9b4fe49bc2a8d8769321c47c5e2465d820c0c1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 27 May 2019 08:37:53 +0530
Subject: [PATCH] PDF Input: Workaround for pdftohtml not always producing
 valid UTF-8. Fixes #1830568 [calibe failed to convert pdf to
 mobi](https://bugs.launchpad.net/calibre/+bug/1830568)

---
 src/calibre/ebooks/pdf/pdftohtml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py
index 1cbf66daef..ac4d523da2 100644
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@@ -95,7 +95,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
 
         if not as_xml:
             with lopen(index, 'r+b') as i:
-                raw = i.read().decode('utf-8')
+                raw = i.read().decode('utf-8', 'replace')
                 raw = flip_images(raw)
                 raw = raw.replace('<head', '<!-- created by calibre\'s pdftohtml -->\n  <head', 1)
                 i.seek(0)