From 7cd8171d3205cfe176e36a634a80b10fdb762358 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 24 Feb 2025 13:06:57 +0530 Subject: [PATCH] Edit book: Allow editing of KEPUB files Note that all Kobo markup is removed when opening the file and automatically re-inserted when saving it. --- manual/edit.rst | 8 +-- src/calibre/ebooks/oeb/polish/check/main.py | 5 +- .../ebooks/oeb/polish/check/parsing.py | 13 +++-- src/calibre/ebooks/oeb/polish/container.py | 52 +++++++++++++++---- src/calibre/ebooks/oeb/polish/kepubify.py | 8 +-- src/calibre/ebooks/oeb/polish/main.py | 2 +- .../ebooks/oeb/polish/tests/kepubify.py | 4 +- src/calibre/ebooks/oeb/polish/tts.py | 2 +- src/calibre/ebooks/oeb/polish/upgrade.py | 2 +- 9 files changed, 64 insertions(+), 32 deletions(-) diff --git a/manual/edit.rst b/manual/edit.rst index bd34b46b25..b057a0b8a0 100644 --- a/manual/edit.rst +++ b/manual/edit.rst @@ -4,10 +4,10 @@ Editing e-books ======================== calibre has an integrated e-book editor that can be used to edit books in the -EPUB and AZW3 (Kindle) formats. The editor shows you the HTML and CSS that is -used internally inside the book files, with a live preview that updates as you -make changes. It also contains various automated tools to perform common -cleanup and fixing tasks. +EPUB, KEPUB (Kobo) and AZW3 (Kindle) formats. The editor shows you the HTML and +CSS that is used internally inside the book files, with a live preview that +updates as you make changes. It also contains various automated tools to +perform common cleanup and fixing tasks. You can use this editor by right clicking on any book in calibre and selecting :guilabel:`Edit book`. diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index 2aa7ad6095..9b0add95b7 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -5,6 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' from collections import namedtuple +from functools import partial from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.polish.check.base import WARN, run_checkers @@ -65,8 +66,8 @@ def run_checks(container): items = raster_images if items is not None: items.append((name, mt, container.raw_data(name, decode=decode))) - if container.book_type == 'epub': - errors.extend(run_checkers(check_html_size, html_items)) + if container.MAX_HTML_FILE_SIZE: + errors.extend(run_checkers(partial(check_html_size, max_size=container.MAX_HTML_FILE_SIZE), html_items)) errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_raster_images, raster_images)) diff --git a/src/calibre/ebooks/oeb/polish/check/parsing.py b/src/calibre/ebooks/oeb/polish/check/parsing.py index 5cfeeb04e9..843168cbc2 100644 --- a/src/calibre/ebooks/oeb/polish/check/parsing.py +++ b/src/calibre/ebooks/oeb/polish/check/parsing.py @@ -154,12 +154,11 @@ class EscapedName(BaseError): class TooLarge(BaseError): level = INFO - MAX_SIZE = 260 *1024 - HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems' - ' on some e-book readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE) - def __init__(self, name): + def __init__(self, name, max_size): BaseError.__init__(self, _('File too large'), name) + self.HELP = _('This HTML file is larger than {}. Too large HTML files can cause performance problems' + ' on some e-book readers. Consider splitting this file into smaller sections.').format(human_readable(max_size)) class BadEntity(BaseError): @@ -244,10 +243,10 @@ class EntitityProcessor: return b' ' * len(m.group()) -def check_html_size(name, mt, raw): +def check_html_size(name, mt, raw, max_size=0): errors = [] - if len(raw) > TooLarge.MAX_SIZE: - errors.append(TooLarge(name)) + if max_size and len(raw) > max_size: + errors.append(TooLarge(name, max_size)) return errors diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index d36476ff21..823359bbe9 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -49,7 +49,7 @@ from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook from calibre.ebooks.oeb.polish.parsing import decode_xml from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css -from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile +from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile, TemporaryDirectory from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail from calibre.utils.ipc.simple_worker import WorkerError, fork_job from calibre.utils.logging import default_log @@ -85,14 +85,14 @@ def clone_dir(src, dest): shutil.copy2(spath, dpath) -def clone_container(container, dest_dir): +def clone_container(container, dest_dir, container_class=None): ' Efficiently clone a container using hard links ' dest_dir = os.path.abspath(os.path.realpath(dest_dir)) clone_data = container.clone_data(dest_dir) - cls = type(container) - if cls is Container: - return cls(None, None, container.log, clone_data=clone_data) - return cls(None, container.log, clone_data=clone_data) + container_class = container_class or type(container) + if container_class is Container: + return container_class(None, None, container.log, clone_data=clone_data) + return container_class(None, container.log, clone_data=clone_data) def name_to_abspath(name, root): @@ -229,6 +229,7 @@ class Container(ContainerBase): # {{{ SUPPORTS_TITLEPAGES = True SUPPORTS_FILENAMES = True + MAX_HTML_FILE_SIZE = 0 @property def book_type_for_display(self): @@ -1121,6 +1122,7 @@ def walk_dir(basedir): class EpubContainer(Container): book_type = 'epub' + MAX_HTML_FILE_SIZE = 260 * 1024 @property def book_type_for_display(self): @@ -1377,6 +1379,12 @@ class EpubContainer(Container): f.write(decrypt_font_data(key, data, alg)) if outpath is None: outpath = self.pathtoepub + self.commit_epub(outpath) + for name, data in iteritems(restore_fonts): + with self.open(name, 'wb') as f: + f.write(data) + + def commit_epub(self, outpath: str) -> None: if self.is_dir: # First remove items from the source dir that do not exist any more for is_root, dirpath, fname in walk_dir(self.pathtoepub): @@ -1413,9 +1421,6 @@ class EpubContainer(Container): et = et.encode('ascii') f.write(et) zip_rebuilder(self.root, outpath) - for name, data in iteritems(restore_fonts): - with self.open(name, 'wb') as f: - f.write(data) @property def path_to_ebook(self): @@ -1428,6 +1433,26 @@ class EpubContainer(Container): # }}} +class KEPUBContainer(EpubContainer): + book_type = 'kepub' + MAX_HTML_FILE_SIZE = 512 * 1024 + + def __init__(self, pathtokepub, log, clone_data=None, tdir=None): + super().__init__(pathtokepub, log=log, clone_data=clone_data, tdir=tdir) + from calibre.ebooks.oeb.polish.kepubify import unkepubify_container + Container.commit(self, keep_parsed=True) + unkepubify_container(self) + + def commit_epub(self, outpath: str) -> None: + if self.is_dir: + return super().commit_epub(outpath) + from calibre.ebooks.oeb.polish.kepubify import Options, kepubify_container + with TemporaryDirectory() as tdir: + container = clone_container(self, tdir, container_class=EpubContainer) + kepubify_container(container, Options()) + container.commit(outpath) + + # AZW3 {{{ class InvalidMobi(InvalidBook): @@ -1590,8 +1615,13 @@ def get_container(path, log=None, tdir=None, tweak_mode=False, ebook_cls=None) - isdir = False own_tdir = not tdir if ebook_cls is None: - ebook_cls = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'} and not isdir - else EpubContainer) + ext = path.rpartition('.')[-1].lower() + ebook_cls = EpubContainer + if not isdir: + if ext in {'azw3', 'mobi', 'original_azw3', 'original_mobi'}: + ebook_cls = AZW3Container + elif ext in {'kepub', 'original_kepub'}: + ebook_cls = KEPUBContainer if own_tdir: tdir = PersistentTemporaryDirectory(f'_{ebook_cls.book_type}_container') try: diff --git a/src/calibre/ebooks/oeb/polish/kepubify.py b/src/calibre/ebooks/oeb/polish/kepubify.py index b9239739aa..1d33793096 100644 --- a/src/calibre/ebooks/oeb/polish/kepubify.py +++ b/src/calibre/ebooks/oeb/polish/kepubify.py @@ -450,14 +450,16 @@ def process_path(path: str, kobo_js_href: str, metadata_lang: str, opts: Options def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None: names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES) num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers) - paths = tuple(map(container.name_to_abspath, names_that_need_work)) + def name_to_abspath(name: str) -> str: + return container.get_file_path_for_processing(name, allow_modification=True) + if num_workers < 2: for name in names_that_need_work: - process_path(container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name]) + process_path(name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name]) else: with ThreadPoolExecutor(max_workers=num_workers) as executor: futures = tuple(executor.submit( - process_path, container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), + process_path, name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work) for future in futures: future.result() diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 4c89d14a2e..8174c355bd 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -51,7 +51,7 @@ CUSTOMIZATION = { 'remove_ncx': True, } -SUPPORTED = {'EPUB', 'AZW3'} +SUPPORTED = {'EPUB', 'AZW3', 'KEPUB'} # Help {{{ HELP = {'about': _( diff --git a/src/calibre/ebooks/oeb/polish/tests/kepubify.py b/src/calibre/ebooks/oeb/polish/tests/kepubify.py index a442ed56b5..e3c0bd1b1e 100644 --- a/src/calibre/ebooks/oeb/polish/tests/kepubify.py +++ b/src/calibre/ebooks/oeb/polish/tests/kepubify.py @@ -3,7 +3,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES -from calibre.ebooks.oeb.polish.container import get_container +from calibre.ebooks.oeb.polish.container import EpubContainer, get_container from calibre.ebooks.oeb.polish.kepubify import ( CSS_COMMENT_COOKIE, DUMMY_COVER_IMAGE_NAME, @@ -27,7 +27,7 @@ class KepubifyTests(BaseTest): path = get_book_for_kepubify(has_cover=has_cover, epub_version=epub_version) opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True) outpath = kepubify_path(path, opts=opts, allow_overwrite=True) - c = get_container(outpath, tweak_mode=True) + c = get_container(outpath, tweak_mode=True, ebook_cls=EpubContainer) spine_names = tuple(n for n, is_linear in c.spine_names) cname = 'titlepage.xhtml' if has_cover else f'{DUMMY_TITLE_PAGE_NAME}.xhtml' self.assertEqual(spine_names, (cname, 'index_split_000.html', 'index_split_001.html')) diff --git a/src/calibre/ebooks/oeb/polish/tts.py b/src/calibre/ebooks/oeb/polish/tts.py index b78ff24596..745e4abef8 100644 --- a/src/calibre/ebooks/oeb/polish/tts.py +++ b/src/calibre/ebooks/oeb/polish/tts.py @@ -452,7 +452,7 @@ def remove_embedded_tts(container): def embed_tts(container, report_progress=None, callback_to_download_voices=None): report_progress = report_progress or ReportProgress() - if container.book_type != 'epub': + if container.book_type not in ('epub', 'kepub'): raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio')) if container.opf_version_parsed[0] < 3: if report_progress(_('Updating book internals'), '', 0, 0): diff --git a/src/calibre/ebooks/oeb/polish/upgrade.py b/src/calibre/ebooks/oeb/polish/upgrade.py index 06940639e7..a3b5356ab7 100644 --- a/src/calibre/ebooks/oeb/polish/upgrade.py +++ b/src/calibre/ebooks/oeb/polish/upgrade.py @@ -169,7 +169,7 @@ def epub_2_to_3(container, report, previous_nav=None, remove_ncx=True): def upgrade_book(container, report, remove_ncx=True): - if container.book_type != 'epub' or container.opf_version_parsed.major >= 3: + if container.book_type not in ('epub', 'kepub') or container.opf_version_parsed.major >= 3: report(_('No upgrade needed')) return False epub_2_to_3(container, report, remove_ncx=remove_ncx)