diff --git a/src/calibre/ebooks/oeb/polish/kepubify.py b/src/calibre/ebooks/oeb/polish/kepubify.py index bcb89a734f..1f2241abf4 100644 --- a/src/calibre/ebooks/oeb/polish/kepubify.py +++ b/src/calibre/ebooks/oeb/polish/kepubify.py @@ -12,24 +12,30 @@ # * Cover marking in the OPF # * Markup cleanup (remove various things that trip up the Kobo renderer) +import os import re +import sys +from concurrent.futures import ThreadPoolExecutor from lxml import etree from calibre.ebooks.metadata import authors_to_string -from calibre.ebooks.oeb.base import XHTML, XPath, escape_cdata +from calibre.ebooks.oeb.base import OEB_DOCS, XHTML, XPath, escape_cdata from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_and_bodies +from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image3, find_cover_page from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.tts import lang_for_elem from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing from calibre.spell.break_iterator import sentence_positions +from calibre.srv.render_book import Profiler, calculate_number_of_workers from calibre.utils.localization import canonicalize_lang, get_lang KOBO_CSS_CLASS = 'kobostylehacks' OUTER_DIV_ID = 'book-columns' INNER_DIV_ID = 'book-inner' KOBO_SPAN_CLASS = 'koboSpan' +DUMMY_TITLE_PAGE_NAME = 'kobo-title-page-generated-by-calibre' SKIPPED_TAGS = frozenset(( '', 'script', 'style', 'atom', 'pre', 'audio', 'video', 'svg', 'math' )) @@ -224,6 +230,16 @@ def kepubify_html_data(raw: str | bytes, metadata_lang: str = 'en'): return root +def kepubify_html_path(path: str, metadata_lang: str = 'en'): + with open(path, 'r+b') as f: + raw = f.read() + root = kepubify_html_data(raw) + raw = serialize_html(root) + f.seek(0) + f.truncate() + f.write(raw) + + def is_probably_a_title_page(root): for title in XPath('//h:title')(root): if title.text: @@ -259,7 +275,7 @@ def add_dummy_title_page(container, cover_image_name): __CONTENT__ ''' - titlepage_name = container.add_file('kobo-title-page-generated-by-calibre.html', modify_name_if_needed=True) + titlepage_name = container.add_file(f'{DUMMY_TITLE_PAGE_NAME}.html', modify_name_if_needed=True) if cover_image_name: cover_href = container.name_to_href(cover_image_name, titlepage_name) html = html.replace('__CONTENT__', f'cover') @@ -272,6 +288,15 @@ def add_dummy_title_page(container, cover_image_name): ''') with container.open(titlepage_name, 'w') as f: f.write(html) + container.apply_unique_properties(titlepage_name, 'calibre:title-page') + + +def remove_dummy_title_page(container): + for name, is_linear in container.spine_names(): + if is_linear: + if DUMMY_TITLE_PAGE_NAME in name: + container.remove_item(name) + break def first_spine_item_is_probably_cover(container) -> bool: @@ -285,10 +310,55 @@ def first_spine_item_is_probably_cover(container) -> bool: return False -def kepubify_container(container): - lang = container.mi.language +def kepubify_container(container, max_workers=0): + remove_dummy_title_page(container) + metadata_lang = container.mi.language cover_image_name = find_cover_image(container) or find_cover_image3(container) if cover_image_name: container.apply_unique_properties(cover_image_name, 'cover-image') if not find_cover_page(container) and not first_spine_item_is_probably_cover(container): add_dummy_title_page(container, cover_image_name) + names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS) + num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers) + paths = tuple(map(container.name_to_abspath, names_that_need_work)) + if num_workers < 2: + for path in paths: + kepubify_html_path(path, metadata_lang) + else: + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = tuple(executor.submit(kepubify_html_path, path, metadata_lang) for path in paths) + for future in futures: + future.result() + + +def profile(): + from calibre.ptempfile import TemporaryDirectory + path = sys.argv[-1] + with TemporaryDirectory() as tdir, Profiler(): + main(path, max_workers=1) + + +def develop(): + from zipfile import ZipFile + + from calibre.ptempfile import TemporaryDirectory + path = sys.argv[-1] + with TemporaryDirectory() as tdir: + outpath = main(path, max_workers=1) + with ZipFile(outpath) as zf: + zf.extractall(tdir) + print('Extracted to:', tdir) + input('Press Enter to quit') + + +def main(path, max_workers=0): + container = get_container(path, tweak_mode=True) + kepubify_container(container, max_workers=max_workers) + base, ext = os.path.splitext(path) + outpath = base + '.kepub' + container.commit(output=outpath) + return outpath + + +if __name__ == '__main__': + main(sys.argv[-1])