Edit book: Allow editing of KEPUB files

Note that all Kobo markup is removed when opening the file and
automatically re-inserted when saving it.
This commit is contained in:
Kovid Goyal 2025-02-24 13:06:57 +05:30
parent 8cae0b402b
commit 7cd8171d32
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 64 additions and 32 deletions

View File

@ -4,10 +4,10 @@ Editing e-books
======================== ========================
calibre has an integrated e-book editor that can be used to edit books in the calibre has an integrated e-book editor that can be used to edit books in the
EPUB and AZW3 (Kindle) formats. The editor shows you the HTML and CSS that is EPUB, KEPUB (Kobo) and AZW3 (Kindle) formats. The editor shows you the HTML and
used internally inside the book files, with a live preview that updates as you CSS that is used internally inside the book files, with a live preview that
make changes. It also contains various automated tools to perform common updates as you make changes. It also contains various automated tools to
cleanup and fixing tasks. perform common cleanup and fixing tasks.
You can use this editor by right clicking on any book in calibre and selecting You can use this editor by right clicking on any book in calibre and selecting
:guilabel:`Edit book`. :guilabel:`Edit book`.

View File

@ -5,6 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import namedtuple from collections import namedtuple
from functools import partial
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.check.base import WARN, run_checkers from calibre.ebooks.oeb.polish.check.base import WARN, run_checkers
@ -65,8 +66,8 @@ def run_checks(container):
items = raster_images items = raster_images
if items is not None: if items is not None:
items.append((name, mt, container.raw_data(name, decode=decode))) items.append((name, mt, container.raw_data(name, decode=decode)))
if container.book_type == 'epub': if container.MAX_HTML_FILE_SIZE:
errors.extend(run_checkers(check_html_size, html_items)) errors.extend(run_checkers(partial(check_html_size, max_size=container.MAX_HTML_FILE_SIZE), html_items))
errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, xml_items))
errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_xml_parsing, html_items))
errors.extend(run_checkers(check_raster_images, raster_images)) errors.extend(run_checkers(check_raster_images, raster_images))

View File

@ -154,12 +154,11 @@ class EscapedName(BaseError):
class TooLarge(BaseError): class TooLarge(BaseError):
level = INFO level = INFO
MAX_SIZE = 260 *1024
HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems'
' on some e-book readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE)
def __init__(self, name): def __init__(self, name, max_size):
BaseError.__init__(self, _('File too large'), name) BaseError.__init__(self, _('File too large'), name)
self.HELP = _('This HTML file is larger than {}. Too large HTML files can cause performance problems'
' on some e-book readers. Consider splitting this file into smaller sections.').format(human_readable(max_size))
class BadEntity(BaseError): class BadEntity(BaseError):
@ -244,10 +243,10 @@ class EntitityProcessor:
return b' ' * len(m.group()) return b' ' * len(m.group())
def check_html_size(name, mt, raw): def check_html_size(name, mt, raw, max_size=0):
errors = [] errors = []
if len(raw) > TooLarge.MAX_SIZE: if max_size and len(raw) > max_size:
errors.append(TooLarge(name)) errors.append(TooLarge(name, max_size))
return errors return errors

View File

@ -49,7 +49,7 @@ from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import decode_xml from calibre.ebooks.oeb.polish.parsing import decode_xml
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile, TemporaryDirectory
from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
from calibre.utils.ipc.simple_worker import WorkerError, fork_job from calibre.utils.ipc.simple_worker import WorkerError, fork_job
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
@ -85,14 +85,14 @@ def clone_dir(src, dest):
shutil.copy2(spath, dpath) shutil.copy2(spath, dpath)
def clone_container(container, dest_dir): def clone_container(container, dest_dir, container_class=None):
' Efficiently clone a container using hard links ' ' Efficiently clone a container using hard links '
dest_dir = os.path.abspath(os.path.realpath(dest_dir)) dest_dir = os.path.abspath(os.path.realpath(dest_dir))
clone_data = container.clone_data(dest_dir) clone_data = container.clone_data(dest_dir)
cls = type(container) container_class = container_class or type(container)
if cls is Container: if container_class is Container:
return cls(None, None, container.log, clone_data=clone_data) return container_class(None, None, container.log, clone_data=clone_data)
return cls(None, container.log, clone_data=clone_data) return container_class(None, container.log, clone_data=clone_data)
def name_to_abspath(name, root): def name_to_abspath(name, root):
@ -229,6 +229,7 @@ class Container(ContainerBase): # {{{
SUPPORTS_TITLEPAGES = True SUPPORTS_TITLEPAGES = True
SUPPORTS_FILENAMES = True SUPPORTS_FILENAMES = True
MAX_HTML_FILE_SIZE = 0
@property @property
def book_type_for_display(self): def book_type_for_display(self):
@ -1121,6 +1122,7 @@ def walk_dir(basedir):
class EpubContainer(Container): class EpubContainer(Container):
book_type = 'epub' book_type = 'epub'
MAX_HTML_FILE_SIZE = 260 * 1024
@property @property
def book_type_for_display(self): def book_type_for_display(self):
@ -1377,6 +1379,12 @@ class EpubContainer(Container):
f.write(decrypt_font_data(key, data, alg)) f.write(decrypt_font_data(key, data, alg))
if outpath is None: if outpath is None:
outpath = self.pathtoepub outpath = self.pathtoepub
self.commit_epub(outpath)
for name, data in iteritems(restore_fonts):
with self.open(name, 'wb') as f:
f.write(data)
def commit_epub(self, outpath: str) -> None:
if self.is_dir: if self.is_dir:
# First remove items from the source dir that do not exist any more # First remove items from the source dir that do not exist any more
for is_root, dirpath, fname in walk_dir(self.pathtoepub): for is_root, dirpath, fname in walk_dir(self.pathtoepub):
@ -1413,9 +1421,6 @@ class EpubContainer(Container):
et = et.encode('ascii') et = et.encode('ascii')
f.write(et) f.write(et)
zip_rebuilder(self.root, outpath) zip_rebuilder(self.root, outpath)
for name, data in iteritems(restore_fonts):
with self.open(name, 'wb') as f:
f.write(data)
@property @property
def path_to_ebook(self): def path_to_ebook(self):
@ -1428,6 +1433,26 @@ class EpubContainer(Container):
# }}} # }}}
class KEPUBContainer(EpubContainer):
book_type = 'kepub'
MAX_HTML_FILE_SIZE = 512 * 1024
def __init__(self, pathtokepub, log, clone_data=None, tdir=None):
super().__init__(pathtokepub, log=log, clone_data=clone_data, tdir=tdir)
from calibre.ebooks.oeb.polish.kepubify import unkepubify_container
Container.commit(self, keep_parsed=True)
unkepubify_container(self)
def commit_epub(self, outpath: str) -> None:
if self.is_dir:
return super().commit_epub(outpath)
from calibre.ebooks.oeb.polish.kepubify import Options, kepubify_container
with TemporaryDirectory() as tdir:
container = clone_container(self, tdir, container_class=EpubContainer)
kepubify_container(container, Options())
container.commit(outpath)
# AZW3 {{{ # AZW3 {{{
class InvalidMobi(InvalidBook): class InvalidMobi(InvalidBook):
@ -1590,8 +1615,13 @@ def get_container(path, log=None, tdir=None, tweak_mode=False, ebook_cls=None) -
isdir = False isdir = False
own_tdir = not tdir own_tdir = not tdir
if ebook_cls is None: if ebook_cls is None:
ebook_cls = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'} and not isdir ext = path.rpartition('.')[-1].lower()
else EpubContainer) ebook_cls = EpubContainer
if not isdir:
if ext in {'azw3', 'mobi', 'original_azw3', 'original_mobi'}:
ebook_cls = AZW3Container
elif ext in {'kepub', 'original_kepub'}:
ebook_cls = KEPUBContainer
if own_tdir: if own_tdir:
tdir = PersistentTemporaryDirectory(f'_{ebook_cls.book_type}_container') tdir = PersistentTemporaryDirectory(f'_{ebook_cls.book_type}_container')
try: try:

View File

@ -450,14 +450,16 @@ def process_path(path: str, kobo_js_href: str, metadata_lang: str, opts: Options
def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None: def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None:
names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES) names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES)
num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers) num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
paths = tuple(map(container.name_to_abspath, names_that_need_work)) def name_to_abspath(name: str) -> str:
return container.get_file_path_for_processing(name, allow_modification=True)
if num_workers < 2: if num_workers < 2:
for name in names_that_need_work: for name in names_that_need_work:
process_path(container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name]) process_path(name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
else: else:
with ThreadPoolExecutor(max_workers=num_workers) as executor: with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = tuple(executor.submit( futures = tuple(executor.submit(
process_path, container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), process_path, name_to_abspath(name), container.name_to_href(kobo_js_name, name),
metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work) metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work)
for future in futures: for future in futures:
future.result() future.result()

View File

@ -51,7 +51,7 @@ CUSTOMIZATION = {
'remove_ncx': True, 'remove_ncx': True,
} }
SUPPORTED = {'EPUB', 'AZW3'} SUPPORTED = {'EPUB', 'AZW3', 'KEPUB'}
# Help {{{ # Help {{{
HELP = {'about': _( HELP = {'about': _(

View File

@ -3,7 +3,7 @@
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.container import EpubContainer, get_container
from calibre.ebooks.oeb.polish.kepubify import ( from calibre.ebooks.oeb.polish.kepubify import (
CSS_COMMENT_COOKIE, CSS_COMMENT_COOKIE,
DUMMY_COVER_IMAGE_NAME, DUMMY_COVER_IMAGE_NAME,
@ -27,7 +27,7 @@ class KepubifyTests(BaseTest):
path = get_book_for_kepubify(has_cover=has_cover, epub_version=epub_version) path = get_book_for_kepubify(has_cover=has_cover, epub_version=epub_version)
opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True) opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True)
outpath = kepubify_path(path, opts=opts, allow_overwrite=True) outpath = kepubify_path(path, opts=opts, allow_overwrite=True)
c = get_container(outpath, tweak_mode=True) c = get_container(outpath, tweak_mode=True, ebook_cls=EpubContainer)
spine_names = tuple(n for n, is_linear in c.spine_names) spine_names = tuple(n for n, is_linear in c.spine_names)
cname = 'titlepage.xhtml' if has_cover else f'{DUMMY_TITLE_PAGE_NAME}.xhtml' cname = 'titlepage.xhtml' if has_cover else f'{DUMMY_TITLE_PAGE_NAME}.xhtml'
self.assertEqual(spine_names, (cname, 'index_split_000.html', 'index_split_001.html')) self.assertEqual(spine_names, (cname, 'index_split_000.html', 'index_split_001.html'))

View File

@ -452,7 +452,7 @@ def remove_embedded_tts(container):
def embed_tts(container, report_progress=None, callback_to_download_voices=None): def embed_tts(container, report_progress=None, callback_to_download_voices=None):
report_progress = report_progress or ReportProgress() report_progress = report_progress or ReportProgress()
if container.book_type != 'epub': if container.book_type not in ('epub', 'kepub'):
raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio')) raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio'))
if container.opf_version_parsed[0] < 3: if container.opf_version_parsed[0] < 3:
if report_progress(_('Updating book internals'), '', 0, 0): if report_progress(_('Updating book internals'), '', 0, 0):

View File

@ -169,7 +169,7 @@ def epub_2_to_3(container, report, previous_nav=None, remove_ncx=True):
def upgrade_book(container, report, remove_ncx=True): def upgrade_book(container, report, remove_ncx=True):
if container.book_type != 'epub' or container.opf_version_parsed.major >= 3: if container.book_type not in ('epub', 'kepub') or container.opf_version_parsed.major >= 3:
report(_('No upgrade needed')) report(_('No upgrade needed'))
return False return False
epub_2_to_3(container, report, remove_ncx=remove_ncx) epub_2_to_3(container, report, remove_ncx=remove_ncx)