Dont use WebKit to render HTML for EPUB covers

This commit is contained in:
Kovid Goyal 2019-06-26 15:02:12 +05:30
parent 27798beaf6
commit 6f86896da8
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 88 additions and 126 deletions

View File

@ -8,8 +8,8 @@ Code for the conversion of ebook formats and the reading of metadata
from various formats.
'''
import traceback, os, re, numbers
from calibre import CurrentDir, prints
import os, re, numbers, sys
from calibre import prints
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
@ -41,40 +41,6 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx']
class HTMLRenderer(object):
def __init__(self, page, loop):
self.page, self.loop = page, loop
self.data = ''
self.exception = self.tb = None
def __call__(self, ok):
from PyQt5.Qt import QImage, QPainter, QByteArray, QBuffer
try:
if not ok:
raise RuntimeError('Rendering of HTML failed.')
de = self.page.mainFrame().documentElement()
pe = de.findFirst('parsererror')
if not pe.isNull():
raise ParserError(pe.toPlainText())
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
self.data = ba.data()
except Exception as e:
self.exception = e
self.traceback = traceback.format_exc()
finally:
self.loop.exit(0)
def return_raster_image(path):
from calibre.utils.imghdr import what
if os.access(path, os.R_OK):
@ -145,63 +111,33 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
pass
if data is None:
from calibre.gui2 import is_ok_to_use_qt
if is_ok_to_use_qt():
data = render_html_data(path_to_html, width, height)
else:
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
try:
result = fork_job('calibre.ebooks',
'render_html_data',
(path_to_html, width, height),
no_output=True)
data = result['result']
except WorkerError as err:
prints(err.orig_tb)
except:
traceback.print_exc()
return data
def render_html_data(path_to_html, width, height):
renderer = render_html(path_to_html, width, height)
return getattr(renderer, 'data', None)
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
def report_error(text=''):
prints('Failed to render', path_to_html, 'with errors:', file=sys.stderr)
if text:
prints(text, file=sys.stderr)
if result['stdout_stderr']:
with open(result['stdout_stderr'], 'rb') as f:
prints(f.read(), file=sys.stderr)
def render_html(path_to_html, width=590, height=750, as_xhtml=True):
from PyQt5.QtWebKitWidgets import QWebPage
from PyQt5.Qt import QEventLoop, QPalette, Qt, QUrl, QSize
from calibre.gui2 import is_ok_to_use_qt, secure_web_page
if not is_ok_to_use_qt():
return None
path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage()
settings = page.settings()
secure_web_page(settings)
pal = page.palette()
pal.setBrush(QPalette.Background, Qt.white)
page.setPalette(pal)
page.setViewportSize(QSize(width, height))
page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
loop = QEventLoop()
renderer = HTMLRenderer(page, loop)
page.loadFinished.connect(renderer, type=Qt.QueuedConnection)
if as_xhtml:
page.mainFrame().setContent(open(path_to_html, 'rb').read(),
'application/xhtml+xml', QUrl.fromLocalFile(path_to_html))
with TemporaryDirectory('-render-html') as tdir:
try:
result = fork_job('calibre.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg'))
except WorkerError as e:
report_error(e.orig_tb)
else:
page.mainFrame().load(QUrl.fromLocalFile(path_to_html))
loop.exec_()
renderer.loop = renderer.page = None
page.loadFinished.disconnect()
del page
del loop
if isinstance(renderer.exception, ParserError) and as_xhtml:
return render_html(path_to_html, width=width, height=height,
as_xhtml=False)
return renderer
if result['result']:
with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f:
return f.read()
else:
report_error()
def check_ebook_format(stream, current_guess):

View File

@ -10,13 +10,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import io
import os
import posixpath
import re
from contextlib import closing
from lxml import etree
from calibre import CurrentDir, walk
from calibre.constants import isosx
from calibre import CurrentDir
from calibre.ebooks.metadata.opf import (
get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf
)
@ -198,39 +196,6 @@ def render_cover(cpage, zf, reader=None):
cpage = os.path.join(tdir, cpage)
if not os.path.exists(cpage):
return
if isosx:
# On OS X trying to render a HTML cover which uses embedded
# fonts more than once in the same process causes a crash in Qt
# so be safe and remove the fonts as well as any @font-face
# rules
for f in walk('.'):
if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
os.remove(f)
ffpat = re.compile(br'@font-face.*?{.*?}',
re.DOTALL|re.IGNORECASE)
with lopen(cpage, 'r+b') as f:
raw = f.read()
f.truncate(0)
f.seek(0)
raw = ffpat.sub(b'', raw)
f.write(raw)
from calibre.ebooks.chardet import xml_to_unicode
raw = xml_to_unicode(raw,
strip_encoding_pats=True, resolve_entities=True)[0]
from lxml import html
for link in html.fromstring(raw).xpath('//link'):
href = link.get('href', '')
if href:
path = os.path.join(os.path.dirname(cpage), href)
if os.path.exists(path):
with lopen(path, 'r+b') as f:
raw = f.read()
f.truncate(0)
f.seek(0)
raw = ffpat.sub(b'', raw)
f.write(raw)
return render_html_svg_workaround(cpage, default_log)

View File

@ -82,7 +82,7 @@ def read_info(outputdir, get_cover):
return ans
def page_images(pdfpath, outputdir, first=1, last=1):
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'):
pdftoppm = get_tools()[1]
outputdir = os.path.abspath(outputdir)
args = {}
@ -90,9 +90,10 @@ def page_images(pdfpath, outputdir, first=1, last=1):
import win32process as w
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
try:
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first),
'-l', unicode_type(last), pdfpath,
os.path.join(outputdir, 'page-images')], **args)
subprocess.check_call([
pdftoppm, '-cropbox', '-' + image_format, '-f', unicode_type(first),
'-l', unicode_type(last), pdfpath, os.path.join(outputdir, prefix)
], **args)
except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import sys
from PyQt5.Qt import QApplication, QMarginsF, QPageLayout, QPageSize, Qt, QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from calibre.ebooks.metadata.pdf import page_images
from calibre.gui2 import must_use_qt
from calibre.gui2.webengine import secure_webengine
class Render(QWebEnginePage):
def __init__(self):
QWebEnginePage.__init__(self)
secure_webengine(self)
self.loadFinished.connect(self.load_finished, type=Qt.QueuedConnection)
self.pdfPrintingFinished.connect(self.print_finished)
def load_finished(self, ok):
if ok:
self.start_print()
else:
QApplication.instance().exit(1)
def start_print(self):
margins = QMarginsF(0, 0, 0, 0)
page_layout = QPageLayout(QPageSize(QPageSize.A4), QPageLayout.Portrait, margins)
self.printToPdf('rendered.pdf', page_layout)
def print_finished(self, path, ok):
QApplication.instance().exit(0 if ok else 2)
def main(path_to_html, tdir, image_format='jpeg'):
if image_format not in ('jpeg', 'png'):
raise ValueError('Image format must be either jpeg or png')
must_use_qt()
path_to_html = os.path.abspath(path_to_html)
os.chdir(tdir)
renderer = Render()
renderer.load(QUrl.fromLocalFile(path_to_html))
ret = QApplication.instance().exec_()
if ret == 0:
page_images('rendered.pdf', image_format=image_format)
ext = {'jpeg': 'jpg'}.get(image_format, image_format)
os.rename('page-images-1.' + ext, 'rendered.' + image_format)
return ret == 0
if __name__ == '__main__':
main(sys.argv[-1], '.')

View File

@ -23,6 +23,8 @@ def secure_webengine(view_or_page_or_settings, for_viewer=False):
if not for_viewer:
a(s.JavascriptEnabled, False)
s.setUnknownUrlSchemePolicy(s.DisallowUnknownUrlSchemes)
if hasattr(view_or_page_or_settings, 'setAudioMuted'):
view_or_page_or_settings.setAudioMuted(True)
a(s.JavascriptCanOpenWindows, False)
a(s.JavascriptCanAccessClipboard, False)
# ensure javascript cannot read from local files