Get PDF output working again with Qt 6

For some reason Qt WebEngine now needs local file access permissions to
load css and fonts. So switch to using a custom protocol, just like the
viewer does.
This commit is contained in:
Kovid Goyal 2022-03-23 23:05:32 +05:30
parent bc97485ff2
commit 6b7f975231
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 128 additions and 47 deletions

View File

@ -10,28 +10,34 @@ import os
import signal
import sys
from collections import namedtuple
from functools import lru_cache
from html5_parser import parse
from io import BytesIO
from itertools import count, repeat
from qt.core import (
QApplication, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl, pyqtSignal, sip
QApplication, QByteArray, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl,
pyqtSignal, sip
)
from qt.webengine import (
QWebEnginePage, QWebEngineProfile, QWebEngineUrlRequestInterceptor, QWebEngineSettings
QWebEnginePage, QWebEngineProfile, QWebEngineSettings,
QWebEngineUrlRequestInterceptor, QWebEngineUrlRequestJob,
QWebEngineUrlSchemeHandler
)
from calibre import detect_ncpus, human_readable, prepare_string_for_xml
from calibre.constants import __version__, iswindows, ismacos
from calibre.constants import (
FAKE_HOST, FAKE_PROTOCOL, __version__, ismacos, iswindows
)
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
from calibre.ebooks.oeb.base import XHTML, XPath
from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.pdf.image_writer import (
Image, PDFMetadata, draw_image_page, get_page_layout
)
from calibre.ebooks.pdf.render.serialize import PDFStream
from calibre.gui2 import setup_unix_signals
from calibre.utils.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.errors import NoGlyphs
@ -43,7 +49,8 @@ from calibre.utils.podofo import (
dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation
)
from calibre.utils.short_uuid import uuid4
from polyglot.builtins import iteritems
from calibre.utils.webengine import secure_webengine, send_reply
from polyglot.builtins import as_bytes, iteritems
from polyglot.urllib import urlparse
OK, KILL_SIGNAL = range(0, 2)
@ -137,6 +144,87 @@ class Container(ContainerBase):
ContainerBase.__init__(self, root_dir or os.path.dirname(opf_path), opf_path, log)
class UrlSchemeHandler(QWebEngineUrlSchemeHandler):
def __init__(self, container, parent=None):
QWebEngineUrlSchemeHandler.__init__(self, parent)
self.allowed_hosts = (FAKE_HOST,)
self.container = container
def requestStarted(self, rq):
if bytes(rq.requestMethod()) != b'GET':
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestDenied)
url = rq.requestUrl()
host = url.host()
if host not in self.allowed_hosts or url.scheme() != FAKE_PROTOCOL:
return self.fail_request(rq)
path = url.path()
if path.startswith('/book/'):
name = path[len('/book/'):]
try:
mime_type = self.container.mime_map.get(name) or guess_type(name)
try:
with self.container.open(name) as f:
q = os.path.abspath(f.name)
if not q.startswith(self.container.root):
raise FileNotFoundError('Attempt to leave sandbox')
data = f.read()
except FileNotFoundError:
print(f'Could not find file {name} in book', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
data = as_bytes(data)
mime_type = {
# Prevent warning in console about mimetype of fonts
'application/vnd.ms-opentype':'application/x-font-ttf',
'application/x-font-truetype':'application/x-font-ttf',
'application/font-sfnt': 'application/x-font-ttf',
}.get(mime_type, mime_type)
send_reply(rq, mime_type, data)
except Exception:
import traceback
traceback.print_exc()
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestFailed)
elif path.startswith('/mathjax/'):
try:
ignore, ignore, base, rest = path.split('/', 3)
except ValueError:
print(f'Could not find file {path} in mathjax', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
try:
mime_type = guess_type(rest)
if base == 'loader' and '/' not in rest and '\\' not in rest:
data = P(rest, allow_user_override=False, data=True)
elif base == 'data':
q = os.path.abspath(os.path.join(mathjax_dir(), rest))
if not q.startswith(mathjax_dir()):
raise FileNotFoundError('')
with open(q, 'rb') as f:
data = f.read()
else:
raise FileNotFoundError('')
send_reply(rq, mime_type, data)
except FileNotFoundError:
print(f'Could not find file {path} in mathjax', file=sys.stderr)
rq.fail(QWebEngineUrlRequestJob.Error.UrlNotFound)
return
except Exception:
import traceback
traceback.print_exc()
return self.fail_request(rq, QWebEngineUrlRequestJob.Error.RequestFailed)
else:
return self.fail_request(rq)
def fail_request(self, rq, fail_code=None):
if fail_code is None:
fail_code = QWebEngineUrlRequestJob.Error.UrlNotFound
rq.fail(fail_code)
print(f"Blocking FAKE_PROTOCOL request: {rq.requestUrl().toString()} with code: {fail_code}", file=sys.stderr)
# }}}
class Renderer(QWebEnginePage):
work_done = pyqtSignal(object, object)
@ -236,7 +324,9 @@ class Renderer(QWebEnginePage):
self.settle_time = settle_time
self.page_layout = page_layout
self.setUrl(QUrl.fromLocalFile(path))
url = QUrl(f'{FAKE_PROTOCOL}://{FAKE_HOST}/')
url.setPath(path)
self.setUrl(url)
class RequestInterceptor(QWebEngineUrlRequestInterceptor):
@ -248,28 +338,22 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor):
request_info.block(True)
return
qurl = request_info.requestUrl()
if qurl.scheme() != 'file':
self.log.warn(f'Blocking URL request {qurl.toString()} as it is not for a local file')
request_info.block(True)
return
path = qurl.toLocalFile()
path = os.path.normcase(os.path.abspath(path))
if not path.startswith(self.container_root) and not path.startswith(self.resources_root):
self.log.warn(f'Blocking URL request with path: {path}')
if qurl.scheme() not in (FAKE_PROTOCOL,):
self.log.warn(f'Blocking URL request {qurl.toString()} as it is not for a resource in the book')
request_info.block(True)
return
class RenderManager(QObject):
def __init__(self, opts, log, container_root):
def __init__(self, opts, log, container):
QObject.__init__(self)
self.interceptor = RequestInterceptor(self)
self.has_maths = {}
self.interceptor.log = self.log = log
self.interceptor.container_root = os.path.normcase(os.path.abspath(container_root))
self.interceptor.resources_root = os.path.normcase(os.path.abspath(os.path.dirname(mathjax_dir())))
ans = QWebEngineProfile(QApplication.instance())
self.url_handler = UrlSchemeHandler(container, parent=ans)
ans.installUrlSchemeHandler(QByteArray(FAKE_PROTOCOL.encode('ascii')), self.url_handler)
ua = 'calibre-pdf-output ' + __version__
ans.setHttpUserAgent(ua)
s = ans.settings()
@ -378,7 +462,7 @@ def resolve_margins(margins, page_layout):
def job_for_name(container, name, margins, page_layout):
index_file = container.name_to_abspath(name)
index_file = '/book/' + name
if margins:
page_layout = QPageLayout(page_layout)
page_layout.setUnits(QPageLayout.Unit.Point)
@ -994,14 +1078,11 @@ def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map
# Maths {{{
@lru_cache(maxsize=2)
def mathjax_dir():
return P('mathjax', allow_user_override=False)
def path_to_url(path):
return QUrl.fromLocalFile(path).toString()
def add_maths_script(container):
has_maths = {}
for name, is_linear in container.spine_names:
@ -1009,10 +1090,9 @@ def add_maths_script(container):
has_maths[name] = hm = check_for_maths(root)
if not hm:
continue
script = root.makeelement(XHTML('script'), type="text/javascript", src=path_to_url(
P('pdf-mathjax-loader.js', allow_user_override=False)))
script = root.makeelement(XHTML('script'), type="text/javascript", src=f'{FAKE_PROTOCOL}://{FAKE_HOST}/mathjax/loader/pdf-mathjax-loader.js')
script.set('async', 'async')
script.set('data-mathjax-path', path_to_url(mathjax_dir()))
script.set('data-mathjax-path', f'{FAKE_PROTOCOL}://{FAKE_HOST}/mathjax/data/')
last_tag(root).append(script)
return has_maths
# }}}
@ -1046,7 +1126,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
container.commit()
report_progress(0.1, _('Completed markup transformation'))
manager = RenderManager(opts, log, container.root)
manager = RenderManager(opts, log, container)
page_layout = get_page_layout(opts)
pdf_doc = None
anchor_locations = {}

View File

@ -7,9 +7,9 @@ import shutil
import sys
from itertools import count
from qt.core import (
QT_VERSION, QApplication, QBuffer, QByteArray, QEvent, QFontDatabase, QFontInfo,
QHBoxLayout, QIODevice, QLocale, QMimeData, QPalette, QSize, Qt, QTimer, QUrl,
QWidget, pyqtSignal, sip
QT_VERSION, QApplication, QByteArray, QEvent, QFontDatabase, QFontInfo,
QHBoxLayout, QLocale, QMimeData, QPalette, QSize, Qt, QTimer, QUrl, QWidget,
pyqtSignal, sip
)
from qt.webengine import (
QWebEnginePage, QWebEngineProfile, QWebEngineScript, QWebEngineSettings,
@ -23,7 +23,7 @@ from calibre.constants import (
)
from calibre.ebooks.metadata.book.base import field_metadata
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.gui2 import choose_images, error_dialog, safe_open_url, config
from calibre.gui2 import choose_images, config, error_dialog, safe_open_url
from calibre.gui2.viewer import link_prefix_for_location_links, performance_monitor
from calibre.gui2.viewer.config import viewer_config_dir, vprefs
from calibre.gui2.viewer.tts import TTS
@ -33,7 +33,8 @@ from calibre.utils.localization import localize_user_manual_link
from calibre.utils.serialize import json_loads
from calibre.utils.shared_file import share_open
from calibre.utils.webengine import (
Bridge, create_script, from_js, insert_scripts, secure_webengine, to_js
Bridge, create_script, from_js, insert_scripts, secure_webengine, send_reply,
to_js
)
from polyglot.builtins import as_bytes, iteritems
from polyglot.functools import lru_cache
@ -91,21 +92,6 @@ def background_image():
return ans
def send_reply(rq, mime_type, data):
if sip.isdeleted(rq):
return
# make the buf a child of rq so that it is automatically deleted when
# rq is deleted
buf = QBuffer(parent=rq)
buf.open(QIODevice.OpenModeFlag.WriteOnly)
# we have to copy data into buf as it will be garbage
# collected by python
buf.write(data)
buf.seek(0)
buf.close()
rq.reply(mime_type.encode('ascii'), buf)
@lru_cache(maxsize=2)
def get_mathjax_dir():
return P('mathjax', allow_user_override=False)

View File

@ -4,12 +4,27 @@
import json
from qt.core import QObject, pyqtSignal
from qt.core import QBuffer, QIODevice, QObject, pyqtSignal, sip
from qt.webengine import QWebEngineScript, QWebEngineSettings
from calibre.utils.rapydscript import special_title
def send_reply(rq, mime_type, data):
if sip.isdeleted(rq):
return
# make the buf a child of rq so that it is automatically deleted when
# rq is deleted
buf = QBuffer(parent=rq)
buf.open(QIODevice.OpenModeFlag.WriteOnly)
# we have to copy data into buf as it will be garbage
# collected by python
buf.write(data)
buf.seek(0)
buf.close()
rq.reply(mime_type.encode('ascii'), buf)
def secure_webengine(view_or_page_or_settings, for_viewer=False):
s = view_or_page_or_settings.settings() if hasattr(
view_or_page_or_settings, 'settings') else view_or_page_or_settings