PDF Input: Workaround for pdftohtml not always producing valid UTF-8. Fixes #1830568 [calibe failed to convert pdf to mobi](https://bugs.launchpad.net/calibre/+bug/1830568)

This commit is contained in:
Kovid Goyal 2019-05-27 08:37:53 +05:30
parent da404870ca
commit ed9b4fe49b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -95,7 +95,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
if not as_xml:
with lopen(index, 'r+b') as i:
raw = i.read().decode('utf-8')
raw = i.read().decode('utf-8', 'replace')
raw = flip_images(raw)
raw = raw.replace('<head', '<!-- created by calibre\'s pdftohtml -->\n <head', 1)
i.seek(0)