Edit book: Allow editing of KEPUB files

Note that all Kobo markup is removed when opening the file and
automatically re-inserted when saving it.
This commit is contained in:
Kovid Goyal 2025-02-24 13:06:57 +05:30
parent 8cae0b402b
commit 7cd8171d32
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 64 additions and 32 deletions

View File

@ -4,10 +4,10 @@ Editing e-books
========================
calibre has an integrated e-book editor that can be used to edit books in the
EPUB and AZW3 (Kindle) formats. The editor shows you the HTML and CSS that is
used internally inside the book files, with a live preview that updates as you
make changes. It also contains various automated tools to perform common
cleanup and fixing tasks.
EPUB, KEPUB (Kobo) and AZW3 (Kindle) formats. The editor shows you the HTML and
CSS that is used internally inside the book files, with a live preview that
updates as you make changes. It also contains various automated tools to
perform common cleanup and fixing tasks.
You can use this editor by right clicking on any book in calibre and selecting
:guilabel:`Edit book`.

View File

@ -5,6 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import namedtuple
from functools import partial
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.check.base import WARN, run_checkers
@ -65,8 +66,8 @@ def run_checks(container):
items = raster_images
if items is not None:
items.append((name, mt, container.raw_data(name, decode=decode)))
if container.book_type == 'epub':
errors.extend(run_checkers(check_html_size, html_items))
if container.MAX_HTML_FILE_SIZE:
errors.extend(run_checkers(partial(check_html_size, max_size=container.MAX_HTML_FILE_SIZE), html_items))
errors.extend(run_checkers(check_xml_parsing, xml_items))
errors.extend(run_checkers(check_xml_parsing, html_items))
errors.extend(run_checkers(check_raster_images, raster_images))

View File

@ -154,12 +154,11 @@ class EscapedName(BaseError):
class TooLarge(BaseError):
level = INFO
MAX_SIZE = 260 *1024
HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems'
' on some e-book readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE)
def __init__(self, name):
def __init__(self, name, max_size):
BaseError.__init__(self, _('File too large'), name)
self.HELP = _('This HTML file is larger than {}. Too large HTML files can cause performance problems'
' on some e-book readers. Consider splitting this file into smaller sections.').format(human_readable(max_size))
class BadEntity(BaseError):
@ -244,10 +243,10 @@ class EntitityProcessor:
return b' ' * len(m.group())
def check_html_size(name, mt, raw):
def check_html_size(name, mt, raw, max_size=0):
errors = []
if len(raw) > TooLarge.MAX_SIZE:
errors.append(TooLarge(name))
if max_size and len(raw) > max_size:
errors.append(TooLarge(name, max_size))
return errors

View File

@ -49,7 +49,7 @@ from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import decode_xml
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile, TemporaryDirectory
from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
from calibre.utils.logging import default_log
@ -85,14 +85,14 @@ def clone_dir(src, dest):
shutil.copy2(spath, dpath)
def clone_container(container, dest_dir):
def clone_container(container, dest_dir, container_class=None):
' Efficiently clone a container using hard links '
dest_dir = os.path.abspath(os.path.realpath(dest_dir))
clone_data = container.clone_data(dest_dir)
cls = type(container)
if cls is Container:
return cls(None, None, container.log, clone_data=clone_data)
return cls(None, container.log, clone_data=clone_data)
container_class = container_class or type(container)
if container_class is Container:
return container_class(None, None, container.log, clone_data=clone_data)
return container_class(None, container.log, clone_data=clone_data)
def name_to_abspath(name, root):
@ -229,6 +229,7 @@ class Container(ContainerBase): # {{{
SUPPORTS_TITLEPAGES = True
SUPPORTS_FILENAMES = True
MAX_HTML_FILE_SIZE = 0
@property
def book_type_for_display(self):
@ -1121,6 +1122,7 @@ def walk_dir(basedir):
class EpubContainer(Container):
book_type = 'epub'
MAX_HTML_FILE_SIZE = 260 * 1024
@property
def book_type_for_display(self):
@ -1377,6 +1379,12 @@ class EpubContainer(Container):
f.write(decrypt_font_data(key, data, alg))
if outpath is None:
outpath = self.pathtoepub
self.commit_epub(outpath)
for name, data in iteritems(restore_fonts):
with self.open(name, 'wb') as f:
f.write(data)
def commit_epub(self, outpath: str) -> None:
if self.is_dir:
# First remove items from the source dir that do not exist any more
for is_root, dirpath, fname in walk_dir(self.pathtoepub):
@ -1413,9 +1421,6 @@ class EpubContainer(Container):
et = et.encode('ascii')
f.write(et)
zip_rebuilder(self.root, outpath)
for name, data in iteritems(restore_fonts):
with self.open(name, 'wb') as f:
f.write(data)
@property
def path_to_ebook(self):
@ -1428,6 +1433,26 @@ class EpubContainer(Container):
# }}}
class KEPUBContainer(EpubContainer):
book_type = 'kepub'
MAX_HTML_FILE_SIZE = 512 * 1024
def __init__(self, pathtokepub, log, clone_data=None, tdir=None):
super().__init__(pathtokepub, log=log, clone_data=clone_data, tdir=tdir)
from calibre.ebooks.oeb.polish.kepubify import unkepubify_container
Container.commit(self, keep_parsed=True)
unkepubify_container(self)
def commit_epub(self, outpath: str) -> None:
if self.is_dir:
return super().commit_epub(outpath)
from calibre.ebooks.oeb.polish.kepubify import Options, kepubify_container
with TemporaryDirectory() as tdir:
container = clone_container(self, tdir, container_class=EpubContainer)
kepubify_container(container, Options())
container.commit(outpath)
# AZW3 {{{
class InvalidMobi(InvalidBook):
@ -1590,8 +1615,13 @@ def get_container(path, log=None, tdir=None, tweak_mode=False, ebook_cls=None) -
isdir = False
own_tdir = not tdir
if ebook_cls is None:
ebook_cls = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'} and not isdir
else EpubContainer)
ext = path.rpartition('.')[-1].lower()
ebook_cls = EpubContainer
if not isdir:
if ext in {'azw3', 'mobi', 'original_azw3', 'original_mobi'}:
ebook_cls = AZW3Container
elif ext in {'kepub', 'original_kepub'}:
ebook_cls = KEPUBContainer
if own_tdir:
tdir = PersistentTemporaryDirectory(f'_{ebook_cls.book_type}_container')
try:

View File

@ -450,14 +450,16 @@ def process_path(path: str, kobo_js_href: str, metadata_lang: str, opts: Options
def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None:
names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES)
num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
paths = tuple(map(container.name_to_abspath, names_that_need_work))
def name_to_abspath(name: str) -> str:
return container.get_file_path_for_processing(name, allow_modification=True)
if num_workers < 2:
for name in names_that_need_work:
process_path(container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
process_path(name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
else:
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = tuple(executor.submit(
process_path, container.name_to_abspath(name), container.name_to_href(kobo_js_name, name),
process_path, name_to_abspath(name), container.name_to_href(kobo_js_name, name),
metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work)
for future in futures:
future.result()

View File

@ -51,7 +51,7 @@ CUSTOMIZATION = {
'remove_ncx': True,
}
SUPPORTED = {'EPUB', 'AZW3'}
SUPPORTED = {'EPUB', 'AZW3', 'KEPUB'}
# Help {{{
HELP = {'about': _(

View File

@ -3,7 +3,7 @@
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.container import EpubContainer, get_container
from calibre.ebooks.oeb.polish.kepubify import (
CSS_COMMENT_COOKIE,
DUMMY_COVER_IMAGE_NAME,
@ -27,7 +27,7 @@ class KepubifyTests(BaseTest):
path = get_book_for_kepubify(has_cover=has_cover, epub_version=epub_version)
opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True)
outpath = kepubify_path(path, opts=opts, allow_overwrite=True)
c = get_container(outpath, tweak_mode=True)
c = get_container(outpath, tweak_mode=True, ebook_cls=EpubContainer)
spine_names = tuple(n for n, is_linear in c.spine_names)
cname = 'titlepage.xhtml' if has_cover else f'{DUMMY_TITLE_PAGE_NAME}.xhtml'
self.assertEqual(spine_names, (cname, 'index_split_000.html', 'index_split_001.html'))

View File

@ -452,7 +452,7 @@ def remove_embedded_tts(container):
def embed_tts(container, report_progress=None, callback_to_download_voices=None):
report_progress = report_progress or ReportProgress()
if container.book_type != 'epub':
if container.book_type not in ('epub', 'kepub'):
raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio'))
if container.opf_version_parsed[0] < 3:
if report_progress(_('Updating book internals'), '', 0, 0):

View File

@ -169,7 +169,7 @@ def epub_2_to_3(container, report, previous_nav=None, remove_ncx=True):
def upgrade_book(container, report, remove_ncx=True):
if container.book_type != 'epub' or container.opf_version_parsed.major >= 3:
if container.book_type not in ('epub', 'kepub') or container.opf_version_parsed.major >= 3:
report(_('No upgrade needed'))
return False
epub_2_to_3(container, report, remove_ncx=remove_ncx)