Get PDF output working again with Qt 6

For some reason Qt WebEngine now needs local file access permissions to
load css and fonts. So switch to using a custom protocol, just like the
viewer does.
This commit is contained in:
Kovid Goyal 2022-03-23 23:05:32 +05:30
parent bc97485ff2
commit 6b7f975231
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 128 additions and 47 deletions

View File

@ -10,28 +10,34 @@ import os
import signal import signal
import sys import sys
from collections import namedtuple from collections import namedtuple
from functools import lru_cache
from html5_parser import parse from html5_parser import parse
from io import BytesIO from io import BytesIO
from itertools import count, repeat from itertools import count, repeat
from qt.core import ( from qt.core import (
QApplication, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl, pyqtSignal, sip QApplication, QByteArray, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl,
pyqtSignal, sip
) )
from qt.webengine import ( from qt.webengine import (
QWebEnginePage, QWebEngineProfile, QWebEngineUrlRequestInterceptor, QWebEngineSettings QWebEnginePage, QWebEngineProfile, QWebEngineSettings,
QWebEngineUrlRequestInterceptor, QWebEngineUrlRequestJob,
QWebEngineUrlSchemeHandler
) )
from calibre import detect_ncpus, human_readable, prepare_string_for_xml from calibre import detect_ncpus, human_readable, prepare_string_for_xml
from calibre.constants import __version__, iswindows, ismacos from calibre.constants import (
FAKE_HOST, FAKE_PROTOCOL, __version__, ismacos, iswindows
)
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
from calibre.ebooks.oeb.base import XHTML, XPath from calibre.ebooks.oeb.base import XHTML, XPath
from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.toc import get_toc from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.pdf.image_writer import ( from calibre.ebooks.pdf.image_writer import (
Image, PDFMetadata, draw_image_page, get_page_layout Image, PDFMetadata, draw_image_page, get_page_layout
) )
from calibre.ebooks.pdf.render.serialize import PDFStream from calibre.ebooks.pdf.render.serialize import PDFStream
from calibre.gui2 import setup_unix_signals from calibre.gui2 import setup_unix_signals
from calibre.utils.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.errors import NoGlyphs from calibre.utils.fonts.sfnt.errors import NoGlyphs
@ -43,7 +49,8 @@ from calibre.utils.podofo import (
dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation
) )
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from polyglot.builtins import iteritems from calibre.utils.webengine import secure_webengine, send_reply
from polyglot.builtins import as_bytes, iteritems
from polyglot.urllib import urlparse from polyglot.urllib import urlparse
OK, KILL_SIGNAL = range(0, 2) OK, KILL_SIGNAL = range(0, 2)
@ -137,6 +144,87 @@ class Container(ContainerBase):
ContainerBase.__init__(self, root_dir or os.path.dirname(opf_path), opf_path, log) ContainerBase.__init__(self, root_dir or os.path.dirname(opf_path), opf_path, log)
class UrlSchemeHandler(QWebEngineUrlSchemeHandler):
def __init__(self, container, parent=None):
QWebEngineUrlSchemeHandler.__init__(self, parent)
self.allowed_hosts = (FAKE_HOST,)
self.container = container
def requestStarted(self, rq):
if bytes(rq.requestMethod()) != b'GET':
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestDenied)
url = rq.requestUrl()
host = url.host()
if host not in self.allowed_hosts or url.scheme() != FAKE_PROTOCOL:
return self.fail_request(rq)
path = url.path()
if path.startswith('/book/'):
name = path[len('/book/'):]
try:
mime_type = self.container.mime_map.get(name) or guess_type(name)
try:
with self.container.open(name) as f:
q = os.path.abspath(f.name)
if not q.startswith(self.container.root):
raise FileNotFoundError('Attempt to leave sandbox')
data = f.read()
except FileNotFoundError:
print(f'Could not find file {name} in book', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
data = as_bytes(data)
mime_type = {
# Prevent warning in console about mimetype of fonts
'application/vnd.ms-opentype':'application/x-font-ttf',
'application/x-font-truetype':'application/x-font-ttf',
'application/font-sfnt': 'application/x-font-ttf',
}.get(mime_type, mime_type)
send_reply(rq, mime_type, data)
except Exception:
import traceback
traceback.print_exc()
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestFailed)
elif path.startswith('/mathjax/'):
try:
ignore, ignore, base, rest = path.split('/', 3)
except ValueError:
print(f'Could not find file {path} in mathjax', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
try:
mime_type = guess_type(rest)
if base == 'loader' and '/' not in rest and '\\' not in rest:
data = P(rest, allow_user_override=False, data=True)
elif base == 'data':
q = os.path.abspath(os.path.join(mathjax_dir(), rest))
if not q.startswith(mathjax_dir()):
raise FileNotFoundError('')
with open(q, 'rb') as f:
data = f.read()
else:
raise FileNotFoundError('')
send_reply(rq, mime_type, data)
except FileNotFoundError:
print(f'Could not find file {path} in mathjax', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
except Exception:
import traceback
traceback.print_exc()
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestFailed)
else:
return self.fail_request(rq)
def fail_request(self, rq, fail_code=None):
if fail_code is None:
fail_code = QWebEngineUrlRequestJob.Error.UrlNotFound
rq.fail(fail_code)
print(f"Blocking FAKE_PROTOCOL request: {rq.requestUrl().toString()} with code: {fail_code}", file=sys.stderr)
# }}}
class Renderer(QWebEnginePage): class Renderer(QWebEnginePage):
work_done = pyqtSignal(object, object) work_done = pyqtSignal(object, object)
@ -236,7 +324,9 @@ class Renderer(QWebEnginePage):
self.settle_time = settle_time self.settle_time = settle_time
self.page_layout = page_layout self.page_layout = page_layout
self.setUrl(QUrl.fromLocalFile(path)) url = QUrl(f'{FAKE_PROTOCOL}://{FAKE_HOST}/')
url.setPath(path)
self.setUrl(url)
class RequestInterceptor(QWebEngineUrlRequestInterceptor): class RequestInterceptor(QWebEngineUrlRequestInterceptor):
@ -248,28 +338,22 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor):
request_info.block(True) request_info.block(True)
return return
qurl = request_info.requestUrl() qurl = request_info.requestUrl()
if qurl.scheme() != 'file': if qurl.scheme() not in (FAKE_PROTOCOL,):
self.log.warn(f'Blocking URL request {qurl.toString()} as it is not for a local file') self.log.warn(f'Blocking URL request {qurl.toString()} as it is not for a resource in the book')
request_info.block(True)
return
path = qurl.toLocalFile()
path = os.path.normcase(os.path.abspath(path))
if not path.startswith(self.container_root) and not path.startswith(self.resources_root):
self.log.warn(f'Blocking URL request with path: {path}')
request_info.block(True) request_info.block(True)
return return
class RenderManager(QObject): class RenderManager(QObject):
def __init__(self, opts, log, container_root): def __init__(self, opts, log, container):
QObject.__init__(self) QObject.__init__(self)
self.interceptor = RequestInterceptor(self) self.interceptor = RequestInterceptor(self)
self.has_maths = {} self.has_maths = {}
self.interceptor.log = self.log = log self.interceptor.log = self.log = log
self.interceptor.container_root = os.path.normcase(os.path.abspath(container_root))
self.interceptor.resources_root = os.path.normcase(os.path.abspath(os.path.dirname(mathjax_dir())))
ans = QWebEngineProfile(QApplication.instance()) ans = QWebEngineProfile(QApplication.instance())
self.url_handler = UrlSchemeHandler(container, parent=ans)
ans.installUrlSchemeHandler(QByteArray(FAKE_PROTOCOL.encode('ascii')), self.url_handler)
ua = 'calibre-pdf-output ' + __version__ ua = 'calibre-pdf-output ' + __version__
ans.setHttpUserAgent(ua) ans.setHttpUserAgent(ua)
s = ans.settings() s = ans.settings()
@ -378,7 +462,7 @@ def resolve_margins(margins, page_layout):
def job_for_name(container, name, margins, page_layout): def job_for_name(container, name, margins, page_layout):
index_file = container.name_to_abspath(name) index_file = '/book/' + name
if margins: if margins:
page_layout = QPageLayout(page_layout) page_layout = QPageLayout(page_layout)
page_layout.setUnits(QPageLayout.Unit.Point) page_layout.setUnits(QPageLayout.Unit.Point)
@ -994,14 +1078,11 @@ def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map
# Maths {{{ # Maths {{{
@lru_cache(maxsize=2)
def mathjax_dir(): def mathjax_dir():
return P('mathjax', allow_user_override=False) return P('mathjax', allow_user_override=False)
def path_to_url(path):
return QUrl.fromLocalFile(path).toString()
def add_maths_script(container): def add_maths_script(container):
has_maths = {} has_maths = {}
for name, is_linear in container.spine_names: for name, is_linear in container.spine_names:
@ -1009,10 +1090,9 @@ def add_maths_script(container):
has_maths[name] = hm = check_for_maths(root) has_maths[name] = hm = check_for_maths(root)
if not hm: if not hm:
continue continue
script = root.makeelement(XHTML('script'), type="text/javascript", src=path_to_url( script = root.makeelement(XHTML('script'), type="text/javascript", src=f'{FAKE_PROTOCOL}://{FAKE_HOST}/mathjax/loader/pdf-mathjax-loader.js')
P('pdf-mathjax-loader.js', allow_user_override=False)))
script.set('async', 'async') script.set('async', 'async')
script.set('data-mathjax-path', path_to_url(mathjax_dir())) script.set('data-mathjax-path', f'{FAKE_PROTOCOL}://{FAKE_HOST}/mathjax/data/')
last_tag(root).append(script) last_tag(root).append(script)
return has_maths return has_maths
# }}} # }}}
@ -1046,7 +1126,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
container.commit() container.commit()
report_progress(0.1, _('Completed markup transformation')) report_progress(0.1, _('Completed markup transformation'))
manager = RenderManager(opts, log, container.root) manager = RenderManager(opts, log, container)
page_layout = get_page_layout(opts) page_layout = get_page_layout(opts)
pdf_doc = None pdf_doc = None
anchor_locations = {} anchor_locations = {}

View File

@ -7,9 +7,9 @@ import shutil
import sys import sys
from itertools import count from itertools import count
from qt.core import ( from qt.core import (
QT_VERSION, QApplication, QBuffer, QByteArray, QEvent, QFontDatabase, QFontInfo, QT_VERSION, QApplication, QByteArray, QEvent, QFontDatabase, QFontInfo,
QHBoxLayout, QIODevice, QLocale, QMimeData, QPalette, QSize, Qt, QTimer, QUrl, QHBoxLayout, QLocale, QMimeData, QPalette, QSize, Qt, QTimer, QUrl, QWidget,
QWidget, pyqtSignal, sip pyqtSignal, sip
) )
from qt.webengine import ( from qt.webengine import (
QWebEnginePage, QWebEngineProfile, QWebEngineScript, QWebEngineSettings, QWebEnginePage, QWebEngineProfile, QWebEngineScript, QWebEngineSettings,
@ -23,7 +23,7 @@ from calibre.constants import (
) )
from calibre.ebooks.metadata.book.base import field_metadata from calibre.ebooks.metadata.book.base import field_metadata
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.gui2 import choose_images, error_dialog, safe_open_url, config from calibre.gui2 import choose_images, config, error_dialog, safe_open_url
from calibre.gui2.viewer import link_prefix_for_location_links, performance_monitor from calibre.gui2.viewer import link_prefix_for_location_links, performance_monitor
from calibre.gui2.viewer.config import viewer_config_dir, vprefs from calibre.gui2.viewer.config import viewer_config_dir, vprefs
from calibre.gui2.viewer.tts import TTS from calibre.gui2.viewer.tts import TTS
@ -33,7 +33,8 @@ from calibre.utils.localization import localize_user_manual_link
from calibre.utils.serialize import json_loads from calibre.utils.serialize import json_loads
from calibre.utils.shared_file import share_open from calibre.utils.shared_file import share_open
from calibre.utils.webengine import ( from calibre.utils.webengine import (
Bridge, create_script, from_js, insert_scripts, secure_webengine, to_js Bridge, create_script, from_js, insert_scripts, secure_webengine, send_reply,
to_js
) )
from polyglot.builtins import as_bytes, iteritems from polyglot.builtins import as_bytes, iteritems
from polyglot.functools import lru_cache from polyglot.functools import lru_cache
@ -91,21 +92,6 @@ def background_image():
return ans return ans
def send_reply(rq, mime_type, data):
if sip.isdeleted(rq):
return
# make the buf a child of rq so that it is automatically deleted when
# rq is deleted
buf = QBuffer(parent=rq)
buf.open(QIODevice.OpenModeFlag.WriteOnly)
# we have to copy data into buf as it will be garbage
# collected by python
buf.write(data)
buf.seek(0)
buf.close()
rq.reply(mime_type.encode('ascii'), buf)
@lru_cache(maxsize=2) @lru_cache(maxsize=2)
def get_mathjax_dir(): def get_mathjax_dir():
return P('mathjax', allow_user_override=False) return P('mathjax', allow_user_override=False)

View File

@ -4,12 +4,27 @@
import json import json
from qt.core import QObject, pyqtSignal from qt.core import QBuffer, QIODevice, QObject, pyqtSignal, sip
from qt.webengine import QWebEngineScript, QWebEngineSettings from qt.webengine import QWebEngineScript, QWebEngineSettings
from calibre.utils.rapydscript import special_title from calibre.utils.rapydscript import special_title
def send_reply(rq, mime_type, data):
if sip.isdeleted(rq):
return
# make the buf a child of rq so that it is automatically deleted when
# rq is deleted
buf = QBuffer(parent=rq)
buf.open(QIODevice.OpenModeFlag.WriteOnly)
# we have to copy data into buf as it will be garbage
# collected by python
buf.write(data)
buf.seek(0)
buf.close()
rq.reply(mime_type.encode('ascii'), buf)
def secure_webengine(view_or_page_or_settings, for_viewer=False): def secure_webengine(view_or_page_or_settings, for_viewer=False):
s = view_or_page_or_settings.settings() if hasattr( s = view_or_page_or_settings.settings() if hasattr(
view_or_page_or_settings, 'settings') else view_or_page_or_settings view_or_page_or_settings, 'settings') else view_or_page_or_settings