Add support for maths to PDF output

This commit is contained in:
Kovid Goyal 2019-07-30 11:32:21 +05:30
parent 0f02312390
commit e4c763d23e
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 115 additions and 19 deletions

View File

@ -0,0 +1,51 @@
/* vim:fileencoding=utf-8
*
* Copyright (C) 2019 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPLv3 license
*/
/*jshint esversion: 6 */
(function() {
"use strict";
function init_mathjax() {
var orig = window.MathJax.Ajax.fileURL.bind(window.MathJax.Ajax);
window.MathJax.Ajax.fileURL = function(mathjax_name) {
var ans = orig(mathjax_name);
if (mathjax_name.startsWith('[MathJax]/../fonts')) {
ans = ans.replace('/../fonts', '/fonts');
}
return ans;
};
}
var base = document.currentScript.getAttribute('data-mathjax-path');
if (!base.endsWith('/')) base += '/';
var script = document.createElement('script');
script.type = 'text/javascript';
script.setAttribute('async', 'async');
script.onerror = function (ev) {
console.log('Failed to load MathJax script: ' + ev.target.src);
};
script.src = base + 'MathJax.js';
window.MathJax = {AuthorInit: init_mathjax};
script.text = `
document.title = 'mathjax-load-started';
MathJax.Hub.signal.Interest(function (message) {if (String(message).match(/error/i)) {console.log(message)}});
MathJax.Hub.Config({
positionToHash: false,
showMathMenu: false,
extensions: ["tex2jax.js", "asciimath2jax.js", "mml2jax.js"],
jax: ["input/TeX", "input/MathML", "input/AsciiMath", "output/CommonHTML"],
TeX: {
extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js", "noUndefined.js"]
}
});
MathJax.Hub.Startup.onload();
MathJax.Hub.Register.StartupHook("End", function() { document.title = "mathjax-load-complete"; });
`.trim();
document.head.appendChild(script);
})();

View File

@ -16,12 +16,12 @@ from io import BytesIO
from itertools import repeat from itertools import repeat
from operator import attrgetter, itemgetter from operator import attrgetter, itemgetter
from html5_parser import parse
from PyQt5.Qt import ( from PyQt5.Qt import (
QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
) )
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile
from html5_parser import parse
from calibre import detect_ncpus, prepare_string_for_xml from calibre import detect_ncpus, prepare_string_for_xml
from calibre.constants import __version__, iswindows from calibre.constants import __version__, iswindows
@ -35,6 +35,7 @@ from calibre.ebooks.pdf.image_writer import (
from calibre.ebooks.pdf.render.serialize import PDFStream from calibre.ebooks.pdf.render.serialize import PDFStream
from calibre.gui2 import setup_unix_signals from calibre.gui2 import setup_unix_signals
from calibre.gui2.webengine import secure_webengine from calibre.gui2.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
@ -57,11 +58,15 @@ def data_as_pdf_doc(data):
return ans return ans
def last_tag(root):
return tuple(root.iterchildren('*'))[-1]
def create_skeleton(container): def create_skeleton(container):
spine_name = tuple(container.spine_names)[-1][0] spine_name = tuple(container.spine_names)[-1][0]
root = container.parsed(spine_name) root = container.parsed(spine_name)
root = copy.deepcopy(root) root = copy.deepcopy(root)
body = root[-1] body = last_tag(root)
body.text = body.tail = None body.text = body.tail = None
del body[:] del body[:]
name = container.add_file(spine_name, b'', modify_name_if_needed=True) name = container.add_file(spine_name, b'', modify_name_if_needed=True)
@ -122,7 +127,11 @@ class Renderer(QWebEnginePage):
def title_changed(self, title): def title_changed(self, title):
if self.wait_for_title and title == self.wait_for_title and self.load_complete: if self.wait_for_title and title == self.wait_for_title and self.load_complete:
QTimer.singleShot(0, self.print_to_pdf) QTimer.singleShot(self.settle_time, self.print_to_pdf)
@property
def log_prefix(self):
return os.path.basename(self.url().toLocalFile()) + ':'
def load_finished(self, ok): def load_finished(self, ok):
self.load_complete = True self.load_complete = True
@ -130,10 +139,10 @@ class Renderer(QWebEnginePage):
self.working = False self.working = False
self.work_done.emit(self, 'Load of {} failed'.format(self.url().toString())) self.work_done.emit(self, 'Load of {} failed'.format(self.url().toString()))
return return
timeout = self.settle_time if self.wait_for_title and self.title() != self.wait_for_title:
if self.wait_for_title and self.title() == self.wait_for_title: self.log(self.log_prefix, 'Load finished, waiting for title to change to:', self.wait_for_title)
timeout = 0 return
QTimer.singleShot(int(1000 * timeout), self.print_to_pdf) QTimer.singleShot(int(1000 * self.settle_time), self.print_to_pdf)
def javaScriptConsoleMessage(self, level, message, linenum, source_id): def javaScriptConsoleMessage(self, level, message, linenum, source_id):
try: try:
@ -152,6 +161,7 @@ class Renderer(QWebEnginePage):
self.working = True self.working = True
self.load_complete = False self.load_complete = False
self.wait_for_title = wait_for_title self.wait_for_title = wait_for_title
self.settle_time = settle_time self.settle_time = settle_time
self.page_layout = page_layout self.page_layout = page_layout
self.setUrl(QUrl.fromLocalFile(path)) self.setUrl(QUrl.fromLocalFile(path))
@ -172,8 +182,8 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor):
return return
path = qurl.toLocalFile() path = qurl.toLocalFile()
path = os.path.normcase(os.path.abspath(path)) path = os.path.normcase(os.path.abspath(path))
if not path.startswith(self.container_root): if not path.startswith(self.container_root) and not path.startswith(self.resources_root):
self.log.warn('Blocking URL request with path: {}'.format(path)) self.log.warn('Blocking request with path: {}'.format(path))
request_info.block(True) request_info.block(True)
return return
@ -183,8 +193,10 @@ class RenderManager(QObject):
def __init__(self, opts, log, container_root): def __init__(self, opts, log, container_root):
QObject.__init__(self) QObject.__init__(self)
self.interceptor = RequestInterceptor(self) self.interceptor = RequestInterceptor(self)
self.has_maths = {}
self.interceptor.log = self.log = log self.interceptor.log = self.log = log
self.interceptor.container_root = os.path.normcase(os.path.abspath(container_root)) self.interceptor.container_root = os.path.normcase(os.path.abspath(container_root))
self.interceptor.resources_root = os.path.normcase(os.path.abspath(os.path.dirname(mathjax_dir())))
ans = QWebEngineProfile(QApplication.instance()) ans = QWebEngineProfile(QApplication.instance())
ua = 'calibre-pdf-output ' + __version__ ua = 'calibre-pdf-output ' + __version__
ans.setHttpUserAgent(ua) ans.setHttpUserAgent(ua)
@ -226,7 +238,8 @@ class RenderManager(QObject):
finally: finally:
self.restore_signal_handlers() self.restore_signal_handlers()
def convert_html_files(self, jobs, settle_time=0, wait_for_title=None): def convert_html_files(self, jobs, settle_time=0, wait_for_title=None, has_maths=None):
self.has_maths = has_maths or {}
while len(self.workers) < min(len(jobs), self.max_workers): while len(self.workers) < min(len(jobs), self.max_workers):
self.create_worker() self.create_worker()
self.pending = list(jobs) self.pending = list(jobs)
@ -235,6 +248,7 @@ class RenderManager(QObject):
self.wait_for_title = wait_for_title self.wait_for_title = wait_for_title
QTimer.singleShot(0, self.assign_work) QTimer.singleShot(0, self.assign_work)
ret = self.run_loop() ret = self.run_loop()
self.has_maths = {}
if ret == KILL_SIGNAL: if ret == KILL_SIGNAL:
raise SystemExit('Kill signal received') raise SystemExit('Kill signal received')
if ret != OK: if ret != OK:
@ -260,7 +274,12 @@ class RenderManager(QObject):
html_file, page_layout, result_key = self.pending.pop() html_file, page_layout, result_key = self.pending.pop()
w = free_workers.pop() w = free_workers.pop()
w.result_key = result_key w.result_key = result_key
w.convert_html_file(html_file, page_layout, settle_time=self.settle_time, wait_for_title=self.wait_for_title) wait_for_title = self.wait_for_title
settle_time = self.settle_time
if self.has_maths.get(result_key):
wait_for_title = 'mathjax-load-complete'
settle_time *= 2
w.convert_html_file(html_file, page_layout, settle_time=settle_time, wait_for_title=wait_for_title)
def work_done(self, worker, result): def work_done(self, worker, result):
self.results[worker.result_key] = result self.results[worker.result_key] = result
@ -339,7 +358,7 @@ def create_margin_files(container):
# Link handling {{{ # Link handling {{{
def add_anchors_markup(root, uuid, anchors): def add_anchors_markup(root, uuid, anchors):
body = root[-1] body = last_tag(root)
div = body.makeelement(XHTML('div'), id=uuid, style='page-break-before: always') div = body.makeelement(XHTML('div'), id=uuid, style='page-break-before: always')
body.append(div) body.append(div)
@ -411,7 +430,7 @@ def make_anchors_unique(container):
if key not in mapping: if key not in mapping:
new_id = mapping[key] = 'a{}'.format(count) new_id = mapping[key] = 'a{}'.format(count)
elem.set('id', new_id) elem.set('id', new_id)
body = root[-1] body = last_tag(root)
if not body.get('id'): if not body.get('id'):
count += 1 count += 1
body.set('id', 'a{}'.format(count)) body.set('id', 'a{}'.format(count))
@ -533,7 +552,7 @@ def get_page_number_display_map(render_manager, opts, num_pages, log):
def add_pagenum_toc(root, toc, opts, page_number_display_map): def add_pagenum_toc(root, toc, opts, page_number_display_map):
body = root[-1] body = last_tag(root)
indents = [] indents = []
for i in range(1, 7): for i in range(1, 7):
indents.extend((i, 1.4*i)) indents.extend((i, 1.4*i))
@ -872,7 +891,7 @@ def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map
report_progress(0.8, _('Adding headers and footers')) report_progress(0.8, _('Adding headers and footers'))
name = create_skeleton(container) name = create_skeleton(container)
root = container.parsed(name) root = container.parsed(name)
body = root[-1] body = last_tag(root)
body.attrib.pop('id', None) body.attrib.pop('id', None)
body.set('style', 'margin: 0; padding: 0; border-width: 0; background-color: unset') body.set('style', 'margin: 0; padding: 0; border-width: 0; background-color: unset')
job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout) job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout)
@ -955,7 +974,7 @@ def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map
template = template.replace('_TOP_LEVEL_SECTION_', prepare_string_for_xml(toplevel_toc_map[page_num - 1])) template = template.replace('_TOP_LEVEL_SECTION_', prepare_string_for_xml(toplevel_toc_map[page_num - 1]))
template = template.replace('_SECTION_', prepare_string_for_xml(page_toc_map[page_num - 1])) template = template.replace('_SECTION_', prepare_string_for_xml(page_toc_map[page_num - 1]))
troot = parse(template, namespace_elements=True) troot = parse(template, namespace_elements=True)
ans = troot[-1][0] ans = last_tag(troot)[0]
style = ans.get('style') or '' style = ans.get('style') or ''
style = ( style = (
'margin: 0; padding: 0; height: {height}pt; border-width: 0;' 'margin: 0; padding: 0; height: {height}pt; border-width: 0;'
@ -997,9 +1016,37 @@ def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map
# }}} # }}}
# Maths {{{
def mathjax_dir():
return P('mathjax', allow_user_override=False)
def path_to_url(path):
return QUrl.fromLocalFile(path).toString()
def add_maths_script(container):
has_maths = {}
for name, is_linear in container.spine_names:
root = container.parsed(name)
has_maths[name] = hm = check_for_maths(root)
if not hm:
continue
script = root.makeelement(XHTML('script'), type="text/javascript", src=path_to_url(
P('pdf-mathjax-loader.js', allow_user_override=False)))
script.set('async', 'async')
script.set('data-mathjax-path', path_to_url(mathjax_dir()))
last_tag(root).append(script)
return has_maths
# }}}
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
container = Container(opf_path, log) container = Container(opf_path, log)
report_progress(0.05, _('Parsed all content for markup transformation')) report_progress(0.05, _('Parsed all content for markup transformation'))
has_maths = add_maths_script(container)
name_anchor_map = make_anchors_unique(container) name_anchor_map = make_anchors_unique(container)
margin_files = tuple(create_margin_files(container)) margin_files = tuple(create_margin_files(container))
toc = get_toc(container, verify_destinations=False) toc = get_toc(container, verify_destinations=False)
@ -1015,7 +1062,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
jobs = [] jobs = []
for margin_file in margin_files: for margin_file in margin_files:
jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout))
results = manager.convert_html_files(jobs, settle_time=1) results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths)
num_pages = 0 num_pages = 0
page_margins_map = [] page_margins_map = []
for margin_file in margin_files: for margin_file in margin_files:
@ -1066,8 +1113,6 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
if num_removed: if num_removed:
log('Removed', num_removed, 'duplicated Type3 glyphs') log('Removed', num_removed, 'duplicated Type3 glyphs')
# TODO: Support for mathematics
num_removed = remove_unused_fonts(pdf_doc) num_removed = remove_unused_fonts(pdf_doc)
if num_removed: if num_removed:
log('Removed', num_removed, 'unused fonts') log('Removed', num_removed, 'unused fonts')