From ab7274536d21e568ce68d0f1e56df7b961a14d43 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 24 Feb 2025 15:22:28 +0530 Subject: [PATCH] KEPUB Output plugin --- manual/faq.rst | 2 +- src/calibre/customize/builtins.py | 3 +- src/calibre/devices/kobo/kobotouch_config.py | 4 +- src/calibre/ebooks/conversion/config.py | 6 + .../ebooks/conversion/plugins/epub_output.py | 170 +++++++++--- src/calibre/gui2/convert/kepub_output.py | 26 ++ src/calibre/gui2/convert/kepub_output.ui | 249 ++++++++++++++++++ src/pyj/book_list/conversion_widgets.pyj | 18 ++ 8 files changed, 442 insertions(+), 36 deletions(-) create mode 100644 src/calibre/gui2/convert/kepub_output.py create mode 100644 src/calibre/gui2/convert/kepub_output.ui diff --git a/manual/faq.rst b/manual/faq.rst index 72e1f89532..e508b30ff0 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -20,7 +20,7 @@ It can convert every input format in the following list, to every output format. *Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, KEPUB, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ -*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP +*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, KEPUB, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP .. note :: diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index e43b676d96..4e2ef8cdaa 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -597,7 +597,7 @@ from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput from calibre.ebooks.conversion.plugins.docx_input import DOCXInput from calibre.ebooks.conversion.plugins.docx_output import DOCXOutput from calibre.ebooks.conversion.plugins.epub_input import EPUBInput -from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput +from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput, KEPUBOutput from calibre.ebooks.conversion.plugins.fb2_input import FB2Input from calibre.ebooks.conversion.plugins.fb2_output import FB2Output from calibre.ebooks.conversion.plugins.html_input import HTMLInput @@ -656,6 +656,7 @@ plugins += [ ] plugins += [ EPUBOutput, + KEPUBOutput, DOCXOutput, FB2Output, LITOutput, diff --git a/src/calibre/devices/kobo/kobotouch_config.py b/src/calibre/devices/kobo/kobotouch_config.py index 7a142455cc..91e3a0ae86 100644 --- a/src/calibre/devices/kobo/kobotouch_config.py +++ b/src/calibre/devices/kobo/kobotouch_config.py @@ -288,13 +288,13 @@ class HyphenationGroupBox(DeviceOptionsGroupBox): mc.setValue(device.get_pref('hyphenation_min_chars')) self.min_chars_before = mc = QSpinBox(self) - l.addRow(_('Minimum character before hyphens') + ':', mc) + l.addRow(_('Minimum characters before hyphens') + ':', mc) mc.setSuffix(_(' characters')) mc.setRange(2, 20) mc.setValue(device.get_pref('hyphenation_min_chars_before')) self.min_chars_after = mc = QSpinBox(self) - l.addRow(_('Minimum character after hyphens') + ':', mc) + l.addRow(_('Minimum characters after hyphens') + ':', mc) mc.setSuffix(_(' characters')) mc.setRange(2, 20) mc.setValue(device.get_pref('hyphenation_min_chars_after')) diff --git a/src/calibre/ebooks/conversion/config.py b/src/calibre/ebooks/conversion/config.py index 9d39528a04..8fa27d3ebb 100644 --- a/src/calibre/ebooks/conversion/config.py +++ b/src/calibre/ebooks/conversion/config.py @@ -281,6 +281,12 @@ OPTIONS = { 'no_svg_cover', 'epub_inline_toc', 'epub_toc_at_end', 'toc_title', 'preserve_cover_aspect_ratio', 'epub_flatten', 'epub_version', 'epub_max_image_size',), + 'kepub': ( + 'dont_split_on_page_breaks', 'flow_size', 'kepub_max_image_size', + 'kepub_affect_hyphenation', 'kepub_disable_hyphenation', 'kepub_hyphenation_min_chars', + 'kepub_hyphenation_min_chars_before', 'kepub_hyphenation_min_chars_after', 'kepub_hyphenation_limit_lines', + ), + 'fb2': ('sectionize', 'fb2_genre'), 'htmlz': ('htmlz_css_type', 'htmlz_class_style', 'htmlz_title_filename'), diff --git a/src/calibre/ebooks/conversion/plugins/epub_output.py b/src/calibre/ebooks/conversion/plugins/epub_output.py index eef51567c6..a06ed43812 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_output.py +++ b/src/calibre/ebooks/conversion/plugins/epub_output.py @@ -42,6 +42,31 @@ block_level_tags = ( 'ul', ) +dont_split_on_page_breaks = OptionRecommendation(name='dont_split_on_page_breaks', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Turn off splitting at page breaks. Normally, input ' + 'files are automatically split at every page break into ' + 'two files. This gives an output e-book that can be ' + 'parsed faster and with less resources. However, ' + 'splitting is slow and if your source file contains a ' + 'very large number of page breaks, you should turn off ' + 'splitting on page breaks.' + ) +) +extract_to = OptionRecommendation(name='extract_to', + help=_('Extract the contents of the generated book to the ' + 'specified folder. The contents of the folder are first ' + 'deleted, so be careful.')) + +max_image_size_help = _( + 'The maximum image size (width x height). A value of {0} means use the screen size from the output' + ' profile. A value of {1} means no maximum size is specified. For example, a value of {2}' + ' will cause all images to be resized so that their width is no more than {3} pixels and' + ' their height is no more than {4} pixels. Note that this only affects the size of the actual' + ' image files themselves. Any given image may be rendered at a different size depending on the styling' + ' applied to it in the document.' +).format('none', 'profile', '100x200', 100, 200) + class EPUBOutput(OutputFormatPlugin): @@ -52,22 +77,9 @@ class EPUBOutput(OutputFormatPlugin): ui_data = {'versions': ('2', '3')} options = { - OptionRecommendation(name='extract_to', - help=_('Extract the contents of the generated %s file to the ' - 'specified folder. The contents of the folder are first ' - 'deleted, so be careful.') % 'EPUB'), + dont_split_on_page_breaks, - OptionRecommendation(name='dont_split_on_page_breaks', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Turn off splitting at page breaks. Normally, input ' - 'files are automatically split at every page break into ' - 'two files. This gives an output e-book that can be ' - 'parsed faster and with less resources. However, ' - 'splitting is slow and if your source file contains a ' - 'very large number of page breaks, you should turn off ' - 'splitting on page breaks.' - ) - ), + extract_to, OptionRecommendation(name='flow_size', recommended_value=260, help=_('Split all HTML files larger than this size (in KB). ' @@ -126,17 +138,10 @@ class EPUBOutput(OutputFormatPlugin): ), OptionRecommendation(name='epub_max_image_size', recommended_value='none', - help=_('The maximum image size (width x height). A value of {0} means use the screen size from the output' - ' profile. A value of {1} means no maximum size is specified. For example, a value of {2}' - ' will cause all images to be resized so that their width is no more than {3} pixels and' - ' their height is no more than {4} pixels. Note that this only affects the size of the actual' - ' image files themselves. Any given image may be rendered at a different size depending on the styling' - ' applied to it in the document.' - ).format('none', 'profile', '100x200', 100, 200) + help=max_image_size_help ), - - } + } recommendations = {('pretty_print', True, OptionRecommendation.HIGH)} @@ -276,6 +281,11 @@ class EPUBOutput(OutputFormatPlugin): encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) if self.opts.epub_version == '3': encryption = self.upgrade_to_epub3(tdir, opf, encryption) + else: + if cb := getattr(self, 'container_callback', None): + container, cxpath, encpath = self.create_container(tdir, opf, encryption) + cb(container) + encryption = self.end_container(cxpath, encpath) from calibre.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), @@ -296,34 +306,47 @@ class EPUBOutput(OutputFormatPlugin): os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) - self.log.info('EPUB extracted to', opts.extract_to) + self.log.info('Book extracted to:', opts.extract_to) - def upgrade_to_epub3(self, tdir, opf, encryption=None): - self.log.info('Upgrading to EPUB 3...') + def create_container(self, tdir, opf, encryption): from calibre.ebooks.epub import simple_container_xml - from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav try: os.mkdir(os.path.join(tdir, 'META-INF')) except OSError: pass with open(os.path.join(tdir, 'META-INF', 'container.xml'), 'wb') as f: f.write(simple_container_xml(os.path.basename(opf)).encode('utf-8')) + enc_file_name = '' if encryption is not None: with open(os.path.join(tdir, 'META-INF', 'encryption.xml'), 'wb') as ef: ef.write(as_bytes(encryption)) + enc_file_name = ef.name from calibre.ebooks.oeb.polish.container import EpubContainer container = EpubContainer(tdir, self.log) + return container, f.name, enc_file_name + + def end_container(self, cxpath, encpath): + os.remove(cxpath) + encryption = None + if encpath: + encryption = open(encpath, 'rb').read() + os.remove(encpath) + return encryption + + def upgrade_to_epub3(self, tdir, opf, encryption=None): + self.log.info('Upgrading to EPUB 3...') + from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3 existing_nav = getattr(self.opts, 'epub3_nav_parsed', None) nav_href = getattr(self.opts, 'epub3_nav_href', None) previous_nav = (nav_href, existing_nav) if existing_nav is not None and nav_href else None + container, cxpath, encpath = self.create_container(tdir, opf, encryption) epub_2_to_3(container, self.log.info, previous_nav=previous_nav) fix_conversion_titlepage_links_in_nav(container) container.commit() - os.remove(f.name) - if encryption is not None: - encryption = open(ef.name, 'rb').read() - os.remove(ef.name) + if cb := getattr(self, 'container_callback', None): + cb(container) + encryption = self.end_container(cxpath, encpath) try: os.rmdir(os.path.join(tdir, 'META-INF')) except OSError: @@ -565,3 +588,86 @@ class EPUBOutput(OutputFormatPlugin): simplify_toc_entry(self.oeb.toc) # }}} + + +class KEPUBOutput(OutputFormatPlugin): + + name = 'KEPUB Output' + author = 'Kovid Goyal' + file_type = 'kepub' + commit_name = 'kepub_output' + + options = { + dont_split_on_page_breaks, + extract_to, + + OptionRecommendation(name='flow_size', recommended_value=512, + help=_('Split all HTML files larger than this size (in KB). ' + 'This is necessary as some devices cannot handle large ' + 'file sizes. Set to 0 to disable size based splitting.') + ), + + OptionRecommendation(name='kepub_max_image_size', recommended_value='none', + help=max_image_size_help + ), + + OptionRecommendation(name='kepub_affect_hyphenation', recommended_value=False, + help=_('Modify how hyphenation is performed for this book. Note that hyphenation' + ' does not perform well for all languages, as it depends on the dictionaries' + ' present on the device, which are not always of the highest quality.') + ), + + OptionRecommendation(name='kepub_disable_hyphenation', recommended_value=False, + help=_('Override all hyphenation settings in book, forcefully disabling hyphenation completely.') + ), + + OptionRecommendation(name='kepub_hyphenation_min_chars', recommended_value=6, + help=_('Minimum word length to hyphenate, in characters.') + ), + + OptionRecommendation(name='kepub_hyphenation_min_chars_before', recommended_value=3, + help=_('Minimum characters before hyphens.') + ), + + OptionRecommendation(name='kepub_hyphenation_min_chars_after', recommended_value=3, + help=_('Minimum characters after hyphens.') + ), + + OptionRecommendation(name='kepub_hyphenation_limit_lines', recommended_value=2, + help=_('Maximum consecutive hyphenated lines.') + ), + } + + recommendations = set(EPUBOutput.recommendations) + + def convert(self, oeb, output_path, input_plugin, opts, log): + from calibre.customize.ui import plugin_for_output_format + from calibre.ebooks.oeb.polish.kepubify import kepubify_container, make_options + + def kepubify(container): + log.info('Adding Kobo markup...') + kopts = make_options( + affect_hyphenation=opts.kepub_affect_hyphenation, + disable_hyphenation=opts.kepub_disable_hyphenation, + hyphenation_min_chars=opts.kepub_hyphenation_min_chars, + hyphenation_min_chars_before=opts.kepub_hyphenation_min_chars_before, + hyphenation_min_chars_after=opts.kepub_hyphenation_min_chars_after, + hyphenation_limit_lines=opts.kepub_hyphenation_limit_lines, + ) + kepubify_container(container, kopts) + container.commit() + + epub_output = plugin_for_output_format('epub') + dp, et, fs = opts.dont_split_on_page_breaks, opts.extract_to, opts.flow_size + for opt in epub_output.options: + setattr(opts, opt.option.name, opt.recommended_value) + opts.epub_version = '3' + opts.dont_split_on_page_breaks = dp + opts.extract_to = et + opts.flow_size = fs + opts.epub_max_image_size = opts.kepub_max_image_size + epub_output.container_callback = kepubify + try: + epub_output.convert(oeb, output_path, input_plugin, opts, log) + finally: + del epub_output.container_callback diff --git a/src/calibre/gui2/convert/kepub_output.py b/src/calibre/gui2/convert/kepub_output.py new file mode 100644 index 0000000000..d1f4b0c730 --- /dev/null +++ b/src/calibre/gui2/convert/kepub_output.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from calibre.ebooks.conversion.config import OPTIONS +from calibre.gui2.convert import Widget +from calibre.gui2.convert.kepub_output_ui import Ui_Form + + +class PluginWidget(Widget, Ui_Form): + + TITLE = _('KEPUB output') + HELP = _('Options specific to')+' KEPUB '+_('output') + COMMIT_NAME = 'kepub_output' + ICON = 'mimetypes/epub.png' + + def __init__(self, parent, get_option, get_help, db=None, book_id=None): + Widget.__init__(self, parent, OPTIONS['output']['kepub']) + for i in range(2): + self.opt_kepub_affect_hyphenation.toggle() + self.db, self.book_id = db, book_id + self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/kepub_output.ui b/src/calibre/gui2/convert/kepub_output.ui new file mode 100644 index 0000000000..d415c36c3b --- /dev/null +++ b/src/calibre/gui2/convert/kepub_output.ui @@ -0,0 +1,249 @@ + + + Form + + + + 0 + 0 + 644 + 353 + + + + Form + + + + + + Do not &split on page breaks + + + + + + + Split files &larger than: + + + opt_flow_size + + + + + + + Disabled + + + KB + + + 0 + + + 1000000 + + + 20 + + + + + + + Shrink &images larger than: + + + opt_kepub_max_image_size + + + + + + + + + + Enable/disable &hyphenation for this book + + + + + + + &Prevent all hyphenation + + + + + + + Hyphenation: &minimum word length: + + + opt_kepub_hyphenation_min_chars + + + + + + + Disabled + + + characters + + + + + + + Hyphenation: minimum characters &before: + + + opt_kepub_hyphenation_min_chars + + + + + + + characters + + + + + + + Hyphenation: minimum characters &after: + + + opt_kepub_hyphenation_min_chars + + + + + + + characters + + + + + + + Hyphenation: &limit lines: + + + opt_kepub_hyphenation_min_chars + + + + + + + lines + + + + + + + Qt::Vertical + + + + 20 + 262 + + + + + + + + + + opt_kepub_affect_hyphenation + toggled(bool) + opt_kepub_disable_hyphenation + setEnabled(bool) + + + 70 + 109 + + + 67 + 132 + + + + + opt_kepub_affect_hyphenation + toggled(bool) + opt_kepub_hyphenation_min_chars + setEnabled(bool) + + + 200 + 113 + + + 274 + 161 + + + + + opt_kepub_affect_hyphenation + toggled(bool) + opt_kepub_hyphenation_min_chars_before + setEnabled(bool) + + + 159 + 115 + + + 267 + 206 + + + + + opt_kepub_affect_hyphenation + toggled(bool) + opt_kepub_hyphenation_min_chars_after + setEnabled(bool) + + + 104 + 115 + + + 245 + 241 + + + + + opt_kepub_affect_hyphenation + toggled(bool) + opt_kepub_hyphenation_limit_lines + setEnabled(bool) + + + 29 + 109 + + + 252 + 262 + + + + + diff --git a/src/pyj/book_list/conversion_widgets.pyj b/src/pyj/book_list/conversion_widgets.pyj index 35fe521616..1577846d28 100644 --- a/src/pyj/book_list/conversion_widgets.pyj +++ b/src/pyj/book_list/conversion_widgets.pyj @@ -550,6 +550,24 @@ def epub_output(container): g.appendChild(choices('epub_version', _('EP&UB version:'), ui_data.versions)) # }}} +# KEPUB Output {{{ +@ep +def kepub_output(container): + g = E.div(class_='simple-group') + container.appendChild(g) + g.appendChild(checkbox('dont_split_on_page_breaks', _('Do not &split on page breaks'))) + g.appendChild(int_spin('flow_size', _('Split files &larger than:'), unit='KB', max=1000000, step=20)) + g.appendChild(lineedit('kepub_max_image_size', _('Shrink &images larger than:'))) + g.appendChild(checkbox('kepub_affect_hyphenation', _('Enable/disable &hyphenation for this book'))) + g.appendChild(checkbox('kepub_disable_hyphenation', _('&Prevent all hyphenation'))) + c = _('characters') + g.appendChild(int_spin('kepub_hyphenation_min_chars', _('Hyphenation: &minimum word length:'), unit=c)) + g.appendChild(int_spin('kepub_hyphenation_min_chars_before', _('Hyphenation: minimum characters &before:'), unit=c)) + g.appendChild(int_spin('kepub_hyphenation_min_chars_after', _('Hyphenation: minimum characters &after:'), unit=c)) + g.appendChild(int_spin('kepub_hyphenation_limit_lines', _('Hyphenation: &limit lines:'), unit=_('lines'))) +# }}} + + # DOCX Output {{{ @ep def docx_output(container):