From 6f86896da8bfdf7c0ca811643b0eb58326e0baf4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2019 15:02:12 +0530 Subject: [PATCH] Dont use WebKit to render HTML for EPUB covers --- src/calibre/ebooks/__init__.py | 108 ++++++---------------------- src/calibre/ebooks/metadata/epub.py | 37 +--------- src/calibre/ebooks/metadata/pdf.py | 9 +-- src/calibre/ebooks/render_html.py | 58 +++++++++++++++ src/calibre/gui2/webengine.py | 2 + 5 files changed, 88 insertions(+), 126 deletions(-) create mode 100644 src/calibre/ebooks/render_html.py diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 34b5904d8f..dd6207b6d3 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -8,8 +8,8 @@ Code for the conversion of ebook formats and the reading of metadata from various formats. ''' -import traceback, os, re, numbers -from calibre import CurrentDir, prints +import os, re, numbers, sys +from calibre import prints from calibre.ebooks.chardet import xml_to_unicode from polyglot.builtins import unicode_type @@ -41,40 +41,6 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht 'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx'] -class HTMLRenderer(object): - - def __init__(self, page, loop): - self.page, self.loop = page, loop - self.data = '' - self.exception = self.tb = None - - def __call__(self, ok): - from PyQt5.Qt import QImage, QPainter, QByteArray, QBuffer - try: - if not ok: - raise RuntimeError('Rendering of HTML failed.') - de = self.page.mainFrame().documentElement() - pe = de.findFirst('parsererror') - if not pe.isNull(): - raise ParserError(pe.toPlainText()) - image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) - image.setDotsPerMeterX(96*(100/2.54)) - image.setDotsPerMeterY(96*(100/2.54)) - painter = QPainter(image) - self.page.mainFrame().render(painter) - painter.end() - ba = QByteArray() - buf = QBuffer(ba) - buf.open(QBuffer.WriteOnly) - image.save(buf, 'JPEG') - self.data = ba.data() - except Exception as e: - self.exception = e - self.traceback = traceback.format_exc() - finally: - self.loop.exit(0) - - def return_raster_image(path): from calibre.utils.imghdr import what if os.access(path, os.R_OK): @@ -145,63 +111,33 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750): pass if data is None: - from calibre.gui2 import is_ok_to_use_qt - if is_ok_to_use_qt(): - data = render_html_data(path_to_html, width, height) - else: - from calibre.utils.ipc.simple_worker import fork_job, WorkerError - try: - result = fork_job('calibre.ebooks', - 'render_html_data', - (path_to_html, width, height), - no_output=True) - data = result['result'] - except WorkerError as err: - prints(err.orig_tb) - except: - traceback.print_exc() + data = render_html_data(path_to_html, width, height) return data def render_html_data(path_to_html, width, height): - renderer = render_html(path_to_html, width, height) - return getattr(renderer, 'data', None) + from calibre.ptempfile import TemporaryDirectory + from calibre.utils.ipc.simple_worker import fork_job, WorkerError + def report_error(text=''): + prints('Failed to render', path_to_html, 'with errors:', file=sys.stderr) + if text: + prints(text, file=sys.stderr) + if result['stdout_stderr']: + with open(result['stdout_stderr'], 'rb') as f: + prints(f.read(), file=sys.stderr) -def render_html(path_to_html, width=590, height=750, as_xhtml=True): - from PyQt5.QtWebKitWidgets import QWebPage - from PyQt5.Qt import QEventLoop, QPalette, Qt, QUrl, QSize - from calibre.gui2 import is_ok_to_use_qt, secure_web_page - if not is_ok_to_use_qt(): - return None - path_to_html = os.path.abspath(path_to_html) - with CurrentDir(os.path.dirname(path_to_html)): - page = QWebPage() - settings = page.settings() - secure_web_page(settings) - pal = page.palette() - pal.setBrush(QPalette.Background, Qt.white) - page.setPalette(pal) - page.setViewportSize(QSize(width, height)) - page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) - page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) - loop = QEventLoop() - renderer = HTMLRenderer(page, loop) - page.loadFinished.connect(renderer, type=Qt.QueuedConnection) - if as_xhtml: - page.mainFrame().setContent(open(path_to_html, 'rb').read(), - 'application/xhtml+xml', QUrl.fromLocalFile(path_to_html)) + with TemporaryDirectory('-render-html') as tdir: + try: + result = fork_job('calibre.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg')) + except WorkerError as e: + report_error(e.orig_tb) else: - page.mainFrame().load(QUrl.fromLocalFile(path_to_html)) - loop.exec_() - renderer.loop = renderer.page = None - page.loadFinished.disconnect() - del page - del loop - if isinstance(renderer.exception, ParserError) and as_xhtml: - return render_html(path_to_html, width=width, height=height, - as_xhtml=False) - return renderer + if result['result']: + with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f: + return f.read() + else: + report_error() def check_ebook_format(stream, current_guess): diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 3773e53fec..3ebac847f5 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -10,13 +10,11 @@ __copyright__ = '2008, Kovid Goyal ' import io import os import posixpath -import re from contextlib import closing from lxml import etree -from calibre import CurrentDir, walk -from calibre.constants import isosx +from calibre import CurrentDir from calibre.ebooks.metadata.opf import ( get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf ) @@ -198,39 +196,6 @@ def render_cover(cpage, zf, reader=None): cpage = os.path.join(tdir, cpage) if not os.path.exists(cpage): return - - if isosx: - # On OS X trying to render a HTML cover which uses embedded - # fonts more than once in the same process causes a crash in Qt - # so be safe and remove the fonts as well as any @font-face - # rules - for f in walk('.'): - if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): - os.remove(f) - ffpat = re.compile(br'@font-face.*?{.*?}', - re.DOTALL|re.IGNORECASE) - with lopen(cpage, 'r+b') as f: - raw = f.read() - f.truncate(0) - f.seek(0) - raw = ffpat.sub(b'', raw) - f.write(raw) - from calibre.ebooks.chardet import xml_to_unicode - raw = xml_to_unicode(raw, - strip_encoding_pats=True, resolve_entities=True)[0] - from lxml import html - for link in html.fromstring(raw).xpath('//link'): - href = link.get('href', '') - if href: - path = os.path.join(os.path.dirname(cpage), href) - if os.path.exists(path): - with lopen(path, 'r+b') as f: - raw = f.read() - f.truncate(0) - f.seek(0) - raw = ffpat.sub(b'', raw) - f.write(raw) - return render_html_svg_workaround(cpage, default_log) diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index bf29d62e1f..4f7486db9d 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -82,7 +82,7 @@ def read_info(outputdir, get_cover): return ans -def page_images(pdfpath, outputdir, first=1, last=1): +def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'): pdftoppm = get_tools()[1] outputdir = os.path.abspath(outputdir) args = {} @@ -90,9 +90,10 @@ def page_images(pdfpath, outputdir, first=1, last=1): import win32process as w args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW try: - subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first), - '-l', unicode_type(last), pdfpath, - os.path.join(outputdir, 'page-images')], **args) + subprocess.check_call([ + pdftoppm, '-cropbox', '-' + image_format, '-f', unicode_type(first), + '-l', unicode_type(last), pdfpath, os.path.join(outputdir, prefix) + ], **args) except subprocess.CalledProcessError as e: raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode) diff --git a/src/calibre/ebooks/render_html.py b/src/calibre/ebooks/render_html.py new file mode 100644 index 0000000000..6d745a0eb1 --- /dev/null +++ b/src/calibre/ebooks/render_html.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2019, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +import os +import sys + +from PyQt5.Qt import QApplication, QMarginsF, QPageLayout, QPageSize, Qt, QUrl +from PyQt5.QtWebEngineWidgets import QWebEnginePage + +from calibre.ebooks.metadata.pdf import page_images +from calibre.gui2 import must_use_qt +from calibre.gui2.webengine import secure_webengine + + +class Render(QWebEnginePage): + + def __init__(self): + QWebEnginePage.__init__(self) + secure_webengine(self) + self.loadFinished.connect(self.load_finished, type=Qt.QueuedConnection) + self.pdfPrintingFinished.connect(self.print_finished) + + def load_finished(self, ok): + if ok: + self.start_print() + else: + QApplication.instance().exit(1) + + def start_print(self): + margins = QMarginsF(0, 0, 0, 0) + page_layout = QPageLayout(QPageSize(QPageSize.A4), QPageLayout.Portrait, margins) + self.printToPdf('rendered.pdf', page_layout) + + def print_finished(self, path, ok): + QApplication.instance().exit(0 if ok else 2) + + +def main(path_to_html, tdir, image_format='jpeg'): + if image_format not in ('jpeg', 'png'): + raise ValueError('Image format must be either jpeg or png') + must_use_qt() + path_to_html = os.path.abspath(path_to_html) + os.chdir(tdir) + renderer = Render() + renderer.load(QUrl.fromLocalFile(path_to_html)) + ret = QApplication.instance().exec_() + if ret == 0: + page_images('rendered.pdf', image_format=image_format) + ext = {'jpeg': 'jpg'}.get(image_format, image_format) + os.rename('page-images-1.' + ext, 'rendered.' + image_format) + return ret == 0 + + +if __name__ == '__main__': + main(sys.argv[-1], '.') diff --git a/src/calibre/gui2/webengine.py b/src/calibre/gui2/webengine.py index adfcb1fb80..719e5b60a9 100644 --- a/src/calibre/gui2/webengine.py +++ b/src/calibre/gui2/webengine.py @@ -23,6 +23,8 @@ def secure_webengine(view_or_page_or_settings, for_viewer=False): if not for_viewer: a(s.JavascriptEnabled, False) s.setUnknownUrlSchemePolicy(s.DisallowUnknownUrlSchemes) + if hasattr(view_or_page_or_settings, 'setAudioMuted'): + view_or_page_or_settings.setAudioMuted(True) a(s.JavascriptCanOpenWindows, False) a(s.JavascriptCanAccessClipboard, False) # ensure javascript cannot read from local files