mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Run conversion of each margin group to PDF in parallel
This commit is contained in:
parent
f0584b8fdb
commit
27c200ea8d
@ -10,9 +10,12 @@ import signal
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from PyQt5.Qt import QApplication, QMarginsF, QPageLayout, QTimer, QUrl
|
from PyQt5.Qt import (
|
||||||
|
QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
|
||||||
|
)
|
||||||
from PyQt5.QtWebEngineWidgets import QWebEnginePage
|
from PyQt5.QtWebEngineWidgets import QWebEnginePage
|
||||||
|
|
||||||
|
from calibre import detect_ncpus
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
|
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
|
||||||
from calibre.ebooks.oeb.base import XHTML
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
@ -31,7 +34,7 @@ from calibre.utils.short_uuid import uuid4
|
|||||||
from polyglot.builtins import iteritems, range
|
from polyglot.builtins import iteritems, range
|
||||||
from polyglot.urllib import urlparse
|
from polyglot.urllib import urlparse
|
||||||
|
|
||||||
OK, LOAD_FAILED, KILL_SIGNAL = range(0, 3)
|
OK, KILL_SIGNAL = range(0, 2)
|
||||||
|
|
||||||
|
|
||||||
class Container(ContainerBase):
|
class Container(ContainerBase):
|
||||||
@ -45,9 +48,12 @@ class Container(ContainerBase):
|
|||||||
|
|
||||||
class Renderer(QWebEnginePage):
|
class Renderer(QWebEnginePage):
|
||||||
|
|
||||||
def __init__(self, opts):
|
work_done = pyqtSignal(object, object)
|
||||||
QWebEnginePage.__init__(self)
|
|
||||||
|
def __init__(self, opts, parent):
|
||||||
|
QWebEnginePage.__init__(self, parent)
|
||||||
secure_webengine(self)
|
secure_webengine(self)
|
||||||
|
self.working = False
|
||||||
self.settle_time = 0
|
self.settle_time = 0
|
||||||
s = self.settings()
|
s = self.settings()
|
||||||
s.setAttribute(s.JavascriptEnabled, True)
|
s.setAttribute(s.JavascriptEnabled, True)
|
||||||
@ -70,9 +76,50 @@ class Renderer(QWebEnginePage):
|
|||||||
s.setFontFamily(s.FixedFont, opts.pdf_mono_family)
|
s.setFontFamily(s.FixedFont, opts.pdf_mono_family)
|
||||||
|
|
||||||
self.loadFinished.connect(self.load_finished)
|
self.loadFinished.connect(self.load_finished)
|
||||||
|
|
||||||
|
def load_finished(self, ok):
|
||||||
|
if not ok:
|
||||||
|
self.working = False
|
||||||
|
self.work_done.emit(self, 'Load of {} failed'.format(self.url().toString()))
|
||||||
|
return
|
||||||
|
QTimer.singleShot(int(1000 * self.settle_time), self.print_to_pdf)
|
||||||
|
|
||||||
|
def print_to_pdf(self):
|
||||||
|
self.printToPdf(self.printing_done, self.page_layout)
|
||||||
|
|
||||||
|
def printing_done(self, pdf_data):
|
||||||
|
self.working = False
|
||||||
|
self.work_done.emit(self, bytes(pdf_data))
|
||||||
|
|
||||||
|
def convert_html_file(self, path, page_layout, settle_time=0):
|
||||||
|
self.working = True
|
||||||
|
self.settle_time = settle_time
|
||||||
|
self.page_layout = page_layout
|
||||||
|
self.setUrl(QUrl.fromLocalFile(path))
|
||||||
|
|
||||||
|
|
||||||
|
class RenderManager(QObject):
|
||||||
|
|
||||||
|
def __init__(self, opts):
|
||||||
|
QObject.__init__(self)
|
||||||
|
self.opts = opts
|
||||||
|
self.workers = []
|
||||||
|
self.max_workers = detect_ncpus()
|
||||||
if not iswindows:
|
if not iswindows:
|
||||||
self.original_signal_handlers = setup_unix_signals(self)
|
self.original_signal_handlers = setup_unix_signals(self)
|
||||||
|
|
||||||
|
def create_worker(self):
|
||||||
|
worker = Renderer(self.opts, self)
|
||||||
|
worker.work_done.connect(self.work_done)
|
||||||
|
self.workers.append(worker)
|
||||||
|
|
||||||
|
def signal_received(self, read_fd):
|
||||||
|
try:
|
||||||
|
os.read(read_fd, 1024)
|
||||||
|
except EnvironmentError:
|
||||||
|
return
|
||||||
|
QApplication.instance().exit(KILL_SIGNAL)
|
||||||
|
|
||||||
def block_signal_handlers(self):
|
def block_signal_handlers(self):
|
||||||
for sig in self.original_signal_handlers:
|
for sig in self.original_signal_handlers:
|
||||||
signal.signal(sig, lambda x, y: None)
|
signal.signal(sig, lambda x, y: None)
|
||||||
@ -81,26 +128,6 @@ class Renderer(QWebEnginePage):
|
|||||||
for sig, handler in self.original_signal_handlers.items():
|
for sig, handler in self.original_signal_handlers.items():
|
||||||
signal.signal(sig, handler)
|
signal.signal(sig, handler)
|
||||||
|
|
||||||
def load_finished(self, ok):
|
|
||||||
if not ok:
|
|
||||||
QApplication.instance().exit(LOAD_FAILED)
|
|
||||||
return
|
|
||||||
QTimer.singleShot(int(1000 * self.settle_time), self.print_to_pdf)
|
|
||||||
|
|
||||||
def signal_received(self, read_fd):
|
|
||||||
try:
|
|
||||||
os.read(read_fd, 1024)
|
|
||||||
except EnvironmentError:
|
|
||||||
return
|
|
||||||
QApplication.instance().exit(KILL_SIGNAL)
|
|
||||||
|
|
||||||
def print_to_pdf(self):
|
|
||||||
self.printToPdf(self.printing_done, self.page_layout)
|
|
||||||
|
|
||||||
def printing_done(self, pdf_data):
|
|
||||||
self.pdf_data = pdf_data
|
|
||||||
QApplication.instance().exit(OK)
|
|
||||||
|
|
||||||
def run_loop(self):
|
def run_loop(self):
|
||||||
self.block_signal_handlers()
|
self.block_signal_handlers()
|
||||||
try:
|
try:
|
||||||
@ -108,19 +135,37 @@ class Renderer(QWebEnginePage):
|
|||||||
finally:
|
finally:
|
||||||
self.restore_signal_handlers()
|
self.restore_signal_handlers()
|
||||||
|
|
||||||
def convert_html_file(self, path, page_layout, settle_time=0):
|
def convert_html_files(self, jobs, settle_time=0):
|
||||||
|
while len(self.workers) < min(len(jobs), self.max_workers):
|
||||||
|
self.create_worker()
|
||||||
|
self.pending = list(jobs)
|
||||||
|
self.results = {}
|
||||||
self.settle_time = settle_time
|
self.settle_time = settle_time
|
||||||
self.page_layout = page_layout
|
QTimer.singleShot(0, self.assign_work)
|
||||||
self.pdf_data = None
|
|
||||||
self.setUrl(QUrl.fromLocalFile(path))
|
|
||||||
ret = self.run_loop()
|
ret = self.run_loop()
|
||||||
if ret == LOAD_FAILED:
|
|
||||||
raise SystemExit('Failed to load {}'.format(path))
|
|
||||||
if ret == KILL_SIGNAL:
|
if ret == KILL_SIGNAL:
|
||||||
raise SystemExit('Kill signal received')
|
raise SystemExit('Kill signal received')
|
||||||
if ret != OK:
|
if ret != OK:
|
||||||
raise SystemExit('Unknown error occurred')
|
raise SystemExit('Unknown error occurred')
|
||||||
return self.pdf_data
|
return self.results
|
||||||
|
|
||||||
|
def assign_work(self):
|
||||||
|
free_workers = [w for w in self.workers if not w.working]
|
||||||
|
while free_workers and self.pending:
|
||||||
|
html_file, page_layout, result_key = self.pending.pop()
|
||||||
|
w = free_workers.pop()
|
||||||
|
w.result_key = result_key
|
||||||
|
w.convert_html_file(html_file, page_layout, settle_time=self.settle_time)
|
||||||
|
|
||||||
|
def work_done(self, worker, result):
|
||||||
|
self.results[worker.result_key] = result
|
||||||
|
if self.pending:
|
||||||
|
self.assign_work()
|
||||||
|
else:
|
||||||
|
for w in self.workers:
|
||||||
|
if w.working:
|
||||||
|
return
|
||||||
|
QApplication.instance().exit(OK)
|
||||||
|
|
||||||
|
|
||||||
def update_metadata(pdf_doc, pdf_metadata):
|
def update_metadata(pdf_doc, pdf_metadata):
|
||||||
@ -131,6 +176,13 @@ def update_metadata(pdf_doc, pdf_metadata):
|
|||||||
pdf_metadata.mi.book_producer, pdf_metadata.mi.tags, xmp_packet)
|
pdf_metadata.mi.book_producer, pdf_metadata.mi.tags, xmp_packet)
|
||||||
|
|
||||||
|
|
||||||
|
def data_as_pdf_doc(data):
|
||||||
|
podofo = get_podofo()
|
||||||
|
ans = podofo.PDFDoc()
|
||||||
|
ans.load(data)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def add_cover(pdf_doc, cover_data, page_layout, opts):
|
def add_cover(pdf_doc, cover_data, page_layout, opts):
|
||||||
buf = BytesIO()
|
buf = BytesIO()
|
||||||
page_size = page_layout.fullRectPoints().size()
|
page_size = page_layout.fullRectPoints().size()
|
||||||
@ -139,10 +191,7 @@ def add_cover(pdf_doc, cover_data, page_layout, opts):
|
|||||||
writer.apply_fill(color=(1, 1, 1))
|
writer.apply_fill(color=(1, 1, 1))
|
||||||
draw_image_page(writer, img, preserve_aspect_ratio=opts.preserve_cover_aspect_ratio)
|
draw_image_page(writer, img, preserve_aspect_ratio=opts.preserve_cover_aspect_ratio)
|
||||||
writer.end()
|
writer.end()
|
||||||
cover_pdf = buf.getvalue()
|
cover_pdf_doc = data_as_pdf_doc(buf.getvalue())
|
||||||
podofo = get_podofo()
|
|
||||||
cover_pdf_doc = podofo.PDFDoc()
|
|
||||||
cover_pdf_doc.load(cover_pdf)
|
|
||||||
pdf_doc.insert_existing_page(cover_pdf_doc)
|
pdf_doc.insert_existing_page(cover_pdf_doc)
|
||||||
|
|
||||||
|
|
||||||
@ -174,7 +223,7 @@ def create_margin_groups(container):
|
|||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
def render_name(container, name, margins, renderer, page_layout):
|
def job_for_name(container, name, margins, page_layout):
|
||||||
index_file = container.name_to_abspath(name)
|
index_file = container.name_to_abspath(name)
|
||||||
if margins:
|
if margins:
|
||||||
page_layout = QPageLayout(page_layout)
|
page_layout = QPageLayout(page_layout)
|
||||||
@ -186,11 +235,7 @@ def render_name(container, name, margins, renderer, page_layout):
|
|||||||
margins.get('right', old_margins.right()),
|
margins.get('right', old_margins.right()),
|
||||||
margins.get('bottom', old_margins.bottom()))
|
margins.get('bottom', old_margins.bottom()))
|
||||||
page_layout.setMargins(new_margins)
|
page_layout.setMargins(new_margins)
|
||||||
pdf_data = renderer.convert_html_file(index_file, page_layout, settle_time=1)
|
return index_file, page_layout, name
|
||||||
podofo = get_podofo()
|
|
||||||
pdf_doc = podofo.PDFDoc()
|
|
||||||
pdf_doc.load(pdf_data)
|
|
||||||
return pdf_doc
|
|
||||||
|
|
||||||
|
|
||||||
def add_anchors_markup(root, uuid, anchors):
|
def add_anchors_markup(root, uuid, anchors):
|
||||||
@ -316,16 +361,24 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
|||||||
(toc)
|
(toc)
|
||||||
container.commit()
|
container.commit()
|
||||||
|
|
||||||
renderer = Renderer(opts)
|
manager = RenderManager(opts)
|
||||||
page_layout = get_page_layout(opts)
|
page_layout = get_page_layout(opts)
|
||||||
pdf_doc = None
|
pdf_doc = None
|
||||||
anchor_locations = {}
|
anchor_locations = {}
|
||||||
name_page_numbers = {}
|
name_page_numbers = {}
|
||||||
num_pages = 0
|
num_pages = 0
|
||||||
|
jobs = []
|
||||||
|
for group in margin_groups:
|
||||||
|
name, margins = group[0]
|
||||||
|
jobs.append(job_for_name(container, name, margins, page_layout))
|
||||||
|
results = manager.convert_html_files(jobs, settle_time=1)
|
||||||
for group in margin_groups:
|
for group in margin_groups:
|
||||||
name, margins = group[0]
|
name, margins = group[0]
|
||||||
name_page_numbers[name] = num_pages + 1
|
name_page_numbers[name] = num_pages + 1
|
||||||
doc = render_name(container, name, margins, renderer, page_layout)
|
data = results[name]
|
||||||
|
if not isinstance(data, bytes):
|
||||||
|
raise SystemExit(data)
|
||||||
|
doc = data_as_pdf_doc(data)
|
||||||
anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid))
|
anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid))
|
||||||
num_pages += doc.page_count()
|
num_pages += doc.page_count()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user