PDF Input: Workaround for pdftohtml not always producing valid UTF-8. Fixes #1830568 [calibe failed to convert pdf to mobi](https://bugs.launchpad.net/calibre/+bug/1830568)

2025-06-23 15:30:45 -04:00 · 2019-05-27 08:37:53 +05:30 · 2019-05-27 08:37:53 +05:30 · ed9b4fe49b
commit ed9b4fe49b
parent da404870ca
1 changed files with 1 additions and 1 deletions
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -95,7 +95,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):

        if not as_xml:
            with lopen(index, 'r+b') as i:
-                raw = i.read().decode('utf-8')
+                raw = i.read().decode('utf-8', 'replace')
                raw = flip_images(raw)
                raw = raw.replace('<head', '<!-- created by calibre\'s pdftohtml -->\n  <head', 1)
                i.seek(0)