PDF Input: Fix background color being incorrectly set to gray when converting many PDF files.

Workaround for bug in pdftohtml
This commit is contained in:
Kovid Goyal 2018-06-04 07:37:06 +05:30
parent 2c83c4747e
commit 85a04d93c5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -109,6 +109,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
raw = re.sub(br'<a\s+name=(\d+)', br'<a id="\1"', raw, flags=re.I)
raw = re.sub(br'<a id="(\d+)"', br'<a id="p\1"', raw, flags=re.I)
raw = re.sub(br'<a href="index.html#(\d+)"', br'<a href="#p\1"', raw, flags=re.I)
# pdftohtml adds link and background colors on <body>. The
# background color is incorrect
raw = re.sub(b'<body .+?>', b'<body>', raw)
i.write(raw)