Edit book: Allow editing of KEPUB files

Note that all Kobo markup is removed when opening the file and automatically re-inserted when saving it.
2025-07-09 03:04:10 -04:00 · 2025-02-24 13:06:57 +05:30 · 2025-02-24 13:06:57 +05:30 · 7cd8171d32
commit 7cd8171d32
parent 8cae0b402b
9 changed files with 64 additions and 32 deletions
--- a/manual/edit.rst
+++ b/manual/edit.rst
@ -4,10 +4,10 @@ Editing e-books
 ========================

 calibre has an integrated e-book editor that can be used to edit books in the
-EPUB and AZW3 (Kindle) formats. The editor shows you the HTML and CSS that is
-used internally inside the book files, with a live preview that updates as you
-make changes. It also contains various automated tools to perform common
-cleanup and fixing tasks.
+EPUB, KEPUB (Kobo) and AZW3 (Kindle) formats. The editor shows you the HTML and
+CSS that is used internally inside the book files, with a live preview that
+updates as you make changes. It also contains various automated tools to
+perform common cleanup and fixing tasks.

 You can use this editor by right clicking on any book in calibre and selecting
 :guilabel:`Edit book`.
--- a/src/calibre/ebooks/oeb/polish/check/main.py
+++ b/src/calibre/ebooks/oeb/polish/check/main.py
@ -5,6 +5,7 @@ __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 from collections import namedtuple
+from functools import partial

 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
 from calibre.ebooks.oeb.polish.check.base import WARN, run_checkers
@ -65,8 +66,8 @@ def run_checks(container):
            items = raster_images
        if items is not None:
            items.append((name, mt, container.raw_data(name, decode=decode)))
-    if container.book_type == 'epub':
-        errors.extend(run_checkers(check_html_size, html_items))
+    if container.MAX_HTML_FILE_SIZE:
+        errors.extend(run_checkers(partial(check_html_size, max_size=container.MAX_HTML_FILE_SIZE), html_items))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
    errors.extend(run_checkers(check_raster_images, raster_images))
--- a/src/calibre/ebooks/oeb/polish/check/parsing.py
+++ b/src/calibre/ebooks/oeb/polish/check/parsing.py
@ -154,12 +154,11 @@ class EscapedName(BaseError):
 class TooLarge(BaseError):

    level = INFO
-    MAX_SIZE = 260 *1024
-    HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems'
-             ' on some e-book readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE)

-    def __init__(self, name):
+    def __init__(self, name, max_size):
        BaseError.__init__(self, _('File too large'), name)
+        self.HELP = _('This HTML file is larger than {}. Too large HTML files can cause performance problems'
+                ' on some e-book readers. Consider splitting this file into smaller sections.').format(human_readable(max_size))


 class BadEntity(BaseError):
@ -244,10 +243,10 @@ class EntitityProcessor:
        return b' ' * len(m.group())


-def check_html_size(name, mt, raw):
+def check_html_size(name, mt, raw, max_size=0):
    errors = []
-    if len(raw) > TooLarge.MAX_SIZE:
-        errors.append(TooLarge(name))
+    if max_size and len(raw) > max_size:
+        errors.append(TooLarge(name, max_size))
    return errors


--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -49,7 +49,7 @@ from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
 from calibre.ebooks.oeb.polish.parsing import decode_xml
 from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
 from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css
-from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
+from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile, TemporaryDirectory
 from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
 from calibre.utils.ipc.simple_worker import WorkerError, fork_job
 from calibre.utils.logging import default_log
@ -85,14 +85,14 @@ def clone_dir(src, dest):
                shutil.copy2(spath, dpath)


-def clone_container(container, dest_dir):
+def clone_container(container, dest_dir, container_class=None):
    ' Efficiently clone a container using hard links '
    dest_dir = os.path.abspath(os.path.realpath(dest_dir))
    clone_data = container.clone_data(dest_dir)
-    cls = type(container)
-    if cls is Container:
-        return cls(None, None, container.log, clone_data=clone_data)
-    return cls(None, container.log, clone_data=clone_data)
+    container_class = container_class or type(container)
+    if container_class is Container:
+        return container_class(None, None, container.log, clone_data=clone_data)
+    return container_class(None, container.log, clone_data=clone_data)


 def name_to_abspath(name, root):
@ -229,6 +229,7 @@ class Container(ContainerBase):  # {{{

    SUPPORTS_TITLEPAGES = True
    SUPPORTS_FILENAMES = True
+    MAX_HTML_FILE_SIZE = 0

    @property
    def book_type_for_display(self):
@ -1121,6 +1122,7 @@ def walk_dir(basedir):
 class EpubContainer(Container):

    book_type = 'epub'
+    MAX_HTML_FILE_SIZE = 260 * 1024

    @property
    def book_type_for_display(self):
@ -1377,6 +1379,12 @@ class EpubContainer(Container):
                f.write(decrypt_font_data(key, data, alg))
        if outpath is None:
            outpath = self.pathtoepub
+        self.commit_epub(outpath)
+        for name, data in iteritems(restore_fonts):
+            with self.open(name, 'wb') as f:
+                f.write(data)
+
+    def commit_epub(self, outpath: str) -> None:
        if self.is_dir:
            # First remove items from the source dir that do not exist any more
            for is_root, dirpath, fname in walk_dir(self.pathtoepub):
@ -1413,9 +1421,6 @@ class EpubContainer(Container):
                    et = et.encode('ascii')
                f.write(et)
            zip_rebuilder(self.root, outpath)
-            for name, data in iteritems(restore_fonts):
-                with self.open(name, 'wb') as f:
-                    f.write(data)

    @property
    def path_to_ebook(self):
@ -1428,6 +1433,26 @@ class EpubContainer(Container):
 # }}}


+class KEPUBContainer(EpubContainer):
+    book_type = 'kepub'
+    MAX_HTML_FILE_SIZE = 512 * 1024
+
+    def __init__(self, pathtokepub, log, clone_data=None, tdir=None):
+        super().__init__(pathtokepub, log=log, clone_data=clone_data, tdir=tdir)
+        from calibre.ebooks.oeb.polish.kepubify import unkepubify_container
+        Container.commit(self, keep_parsed=True)
+        unkepubify_container(self)
+
+    def commit_epub(self, outpath: str) -> None:
+        if self.is_dir:
+            return super().commit_epub(outpath)
+        from calibre.ebooks.oeb.polish.kepubify import Options, kepubify_container
+        with TemporaryDirectory() as tdir:
+            container = clone_container(self, tdir, container_class=EpubContainer)
+            kepubify_container(container, Options())
+            container.commit(outpath)
+
+
 # AZW3 {{{

 class InvalidMobi(InvalidBook):
@ -1590,8 +1615,13 @@ def get_container(path, log=None, tdir=None, tweak_mode=False, ebook_cls=None) -
        isdir = False
    own_tdir = not tdir
    if ebook_cls is None:
-        ebook_cls = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'} and not isdir
-                else EpubContainer)
+        ext = path.rpartition('.')[-1].lower()
+        ebook_cls = EpubContainer
+        if not isdir:
+            if ext in {'azw3', 'mobi', 'original_azw3', 'original_mobi'}:
+                ebook_cls = AZW3Container
+            elif ext in {'kepub', 'original_kepub'}:
+                ebook_cls = KEPUBContainer
    if own_tdir:
        tdir = PersistentTemporaryDirectory(f'_{ebook_cls.book_type}_container')
    try:
--- a/src/calibre/ebooks/oeb/polish/kepubify.py
+++ b/src/calibre/ebooks/oeb/polish/kepubify.py
@ -450,14 +450,16 @@ def process_path(path: str, kobo_js_href: str, metadata_lang: str, opts: Options
 def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None:
    names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES)
    num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
-    paths = tuple(map(container.name_to_abspath, names_that_need_work))
+    def name_to_abspath(name: str) -> str:
+        return container.get_file_path_for_processing(name, allow_modification=True)
+
    if num_workers < 2:
        for name in names_that_need_work:
-            process_path(container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
+            process_path(name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
    else:
        with ThreadPoolExecutor(max_workers=num_workers) as executor:
            futures = tuple(executor.submit(
-                process_path, container.name_to_abspath(name), container.name_to_href(kobo_js_name, name),
+                process_path, name_to_abspath(name), container.name_to_href(kobo_js_name, name),
                metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work)
            for future in futures:
                future.result()
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@ -51,7 +51,7 @@ CUSTOMIZATION = {
    'remove_ncx': True,
 }

-SUPPORTED = {'EPUB', 'AZW3'}
+SUPPORTED = {'EPUB', 'AZW3', 'KEPUB'}

 # Help {{{
 HELP = {'about': _(
--- a/src/calibre/ebooks/oeb/polish/tests/kepubify.py
+++ b/src/calibre/ebooks/oeb/polish/tests/kepubify.py
@ -3,7 +3,7 @@


 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
-from calibre.ebooks.oeb.polish.container import get_container
+from calibre.ebooks.oeb.polish.container import EpubContainer, get_container
 from calibre.ebooks.oeb.polish.kepubify import (
    CSS_COMMENT_COOKIE,
    DUMMY_COVER_IMAGE_NAME,
@ -27,7 +27,7 @@ class KepubifyTests(BaseTest):
            path = get_book_for_kepubify(has_cover=has_cover, epub_version=epub_version)
            opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True)
            outpath = kepubify_path(path, opts=opts, allow_overwrite=True)
-            c = get_container(outpath, tweak_mode=True)
+            c = get_container(outpath, tweak_mode=True, ebook_cls=EpubContainer)
            spine_names = tuple(n for n, is_linear in c.spine_names)
            cname = 'titlepage.xhtml' if has_cover else f'{DUMMY_TITLE_PAGE_NAME}.xhtml'
            self.assertEqual(spine_names, (cname, 'index_split_000.html', 'index_split_001.html'))
--- a/src/calibre/ebooks/oeb/polish/tts.py
+++ b/src/calibre/ebooks/oeb/polish/tts.py
@ -452,7 +452,7 @@ def remove_embedded_tts(container):

 def embed_tts(container, report_progress=None, callback_to_download_voices=None):
    report_progress = report_progress or ReportProgress()
-    if container.book_type != 'epub':
+    if container.book_type not in ('epub', 'kepub'):
        raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio'))
    if container.opf_version_parsed[0] < 3:
        if report_progress(_('Updating book internals'), '', 0, 0):
--- a/src/calibre/ebooks/oeb/polish/upgrade.py
+++ b/src/calibre/ebooks/oeb/polish/upgrade.py
@ -169,7 +169,7 @@ def epub_2_to_3(container, report, previous_nav=None, remove_ncx=True):


 def upgrade_book(container, report, remove_ncx=True):
-    if container.book_type != 'epub' or container.opf_version_parsed.major >= 3:
+    if container.book_type not in ('epub', 'kepub') or container.opf_version_parsed.major >= 3:
        report(_('No upgrade needed'))
        return False
    epub_2_to_3(container, report, remove_ncx=remove_ncx)