E-book viewer: Speed up preparation of book on initial load on Unix platforms by using fork() to bypass the Python GIL

Have seen a 300% speedup for 400K word books.
This commit is contained in:
Kovid Goyal 2025-04-06 14:14:03 +05:30
parent 5d4f56d4fa
commit 25c7a4c871
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -24,6 +24,7 @@ from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.srv.metadata import encode_datetime from calibre.srv.metadata import encode_datetime
from calibre.utils.date import EPOCH from calibre.utils.date import EPOCH
from calibre.utils.forked_map import forked_map, forked_map_is_supported
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.serialize import json_dumps, json_loads, msgpack_loads from calibre.utils.serialize import json_dumps, json_loads, msgpack_loads
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
@ -575,6 +576,12 @@ def calculate_number_of_workers(names, in_process_container, max_workers):
return num_workers return num_workers
def forked_process_book_files(container, names_that_need_work, num_workers, *common_args):
def w(name):
return process_book_files((name,), *common_args, container=container)
yield from forked_map(w, names_that_need_work, num_workers=num_workers)
def process_exploded_book( def process_exploded_book(
book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False, book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False,
book_metadata=None, virtualize_resources=True, max_workers=1 book_metadata=None, virtualize_resources=True, max_workers=1
@ -645,15 +652,19 @@ def process_exploded_book(
names_that_need_work = tuple(n for n, mt in container.mime_map.items() if needs_work(mt)) names_that_need_work = tuple(n for n, mt in container.mime_map.items() if needs_work(mt))
num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers) num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
results = [] results = []
common_args = tdir, opfpath, virtualize_resources, book_render_data['link_uid']
if num_workers < 2: if num_workers < 2:
results.append(process_book_files(names_that_need_work, tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container=container)) results.append(process_book_files(names_that_need_work, *common_args, container=container))
else: else:
with ThreadPoolExecutor(max_workers=num_workers) as executor: if forked_map_is_supported:
futures = tuple( results.extend(forked_process_book_files(container, names_that_need_work, num_workers, *common_args))
executor.submit(process_book_files, (name,), tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container=container) else:
for name in names_that_need_work) with ThreadPoolExecutor(max_workers=num_workers) as executor:
for future in futures: futures = tuple(
results.append(future.result()) executor.submit(process_book_files, (name,), *common_args, container=container)
for name in names_that_need_work)
for future in futures:
results.append(future.result())
ltm = book_render_data['link_to_map'] ltm = book_render_data['link_to_map']
html_data = {} html_data = {}
@ -974,16 +985,19 @@ def profile():
) )
def develop(): def develop(max_workers=1, wait_for_input=True):
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
path = sys.argv[-1] path = sys.argv[-1]
if max_workers < 1:
max_workers = os.cpu_count()
with TemporaryDirectory() as tdir: with TemporaryDirectory() as tdir:
render( render(
path, tdir, serialize_metadata=True, path, tdir, serialize_metadata=True,
extract_annotations=True, virtualize_resources=True, max_workers=1 extract_annotations=True, virtualize_resources=True, max_workers=1
) )
print('Extracted to:', tdir) print('Extracted to:', tdir)
input('Press Enter to quit') if wait_for_input:
input('Press Enter to quit')
if __name__ == '__main__': if __name__ == '__main__':