KEPUB Output plugin

This commit is contained in:
Kovid Goyal 2025-02-24 15:22:28 +05:30
parent 7cd8171d32
commit ab7274536d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
8 changed files with 442 additions and 36 deletions

View File

@ -20,7 +20,7 @@ It can convert every input format in the following list, to every output format.
*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, KEPUB, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP
*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, KEPUB, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP
.. note ::

View File

@ -597,7 +597,7 @@ from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
from calibre.ebooks.conversion.plugins.docx_input import DOCXInput
from calibre.ebooks.conversion.plugins.docx_output import DOCXOutput
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput, KEPUBOutput
from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
@ -656,6 +656,7 @@ plugins += [
]
plugins += [
EPUBOutput,
KEPUBOutput,
DOCXOutput,
FB2Output,
LITOutput,

View File

@ -288,13 +288,13 @@ class HyphenationGroupBox(DeviceOptionsGroupBox):
mc.setValue(device.get_pref('hyphenation_min_chars'))
self.min_chars_before = mc = QSpinBox(self)
l.addRow(_('Minimum character before hyphens') + ':', mc)
l.addRow(_('Minimum characters before hyphens') + ':', mc)
mc.setSuffix(_(' characters'))
mc.setRange(2, 20)
mc.setValue(device.get_pref('hyphenation_min_chars_before'))
self.min_chars_after = mc = QSpinBox(self)
l.addRow(_('Minimum character after hyphens') + ':', mc)
l.addRow(_('Minimum characters after hyphens') + ':', mc)
mc.setSuffix(_(' characters'))
mc.setRange(2, 20)
mc.setValue(device.get_pref('hyphenation_min_chars_after'))

View File

@ -281,6 +281,12 @@ OPTIONS = {
'no_svg_cover', 'epub_inline_toc', 'epub_toc_at_end', 'toc_title',
'preserve_cover_aspect_ratio', 'epub_flatten', 'epub_version', 'epub_max_image_size',),
'kepub': (
'dont_split_on_page_breaks', 'flow_size', 'kepub_max_image_size',
'kepub_affect_hyphenation', 'kepub_disable_hyphenation', 'kepub_hyphenation_min_chars',
'kepub_hyphenation_min_chars_before', 'kepub_hyphenation_min_chars_after', 'kepub_hyphenation_limit_lines',
),
'fb2': ('sectionize', 'fb2_genre'),
'htmlz': ('htmlz_css_type', 'htmlz_class_style', 'htmlz_title_filename'),

View File

@ -42,6 +42,31 @@ block_level_tags = (
'ul',
)
dont_split_on_page_breaks = OptionRecommendation(name='dont_split_on_page_breaks',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Turn off splitting at page breaks. Normally, input '
'files are automatically split at every page break into '
'two files. This gives an output e-book that can be '
'parsed faster and with less resources. However, '
'splitting is slow and if your source file contains a '
'very large number of page breaks, you should turn off '
'splitting on page breaks.'
)
)
extract_to = OptionRecommendation(name='extract_to',
help=_('Extract the contents of the generated book to the '
'specified folder. The contents of the folder are first '
'deleted, so be careful.'))
max_image_size_help = _(
'The maximum image size (width x height). A value of {0} means use the screen size from the output'
' profile. A value of {1} means no maximum size is specified. For example, a value of {2}'
' will cause all images to be resized so that their width is no more than {3} pixels and'
' their height is no more than {4} pixels. Note that this only affects the size of the actual'
' image files themselves. Any given image may be rendered at a different size depending on the styling'
' applied to it in the document.'
).format('none', 'profile', '100x200', 100, 200)
class EPUBOutput(OutputFormatPlugin):
@ -52,22 +77,9 @@ class EPUBOutput(OutputFormatPlugin):
ui_data = {'versions': ('2', '3')}
options = {
OptionRecommendation(name='extract_to',
help=_('Extract the contents of the generated %s file to the '
'specified folder. The contents of the folder are first '
'deleted, so be careful.') % 'EPUB'),
dont_split_on_page_breaks,
OptionRecommendation(name='dont_split_on_page_breaks',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Turn off splitting at page breaks. Normally, input '
'files are automatically split at every page break into '
'two files. This gives an output e-book that can be '
'parsed faster and with less resources. However, '
'splitting is slow and if your source file contains a '
'very large number of page breaks, you should turn off '
'splitting on page breaks.'
)
),
extract_to,
OptionRecommendation(name='flow_size', recommended_value=260,
help=_('Split all HTML files larger than this size (in KB). '
@ -126,17 +138,10 @@ class EPUBOutput(OutputFormatPlugin):
),
OptionRecommendation(name='epub_max_image_size', recommended_value='none',
help=_('The maximum image size (width x height). A value of {0} means use the screen size from the output'
' profile. A value of {1} means no maximum size is specified. For example, a value of {2}'
' will cause all images to be resized so that their width is no more than {3} pixels and'
' their height is no more than {4} pixels. Note that this only affects the size of the actual'
' image files themselves. Any given image may be rendered at a different size depending on the styling'
' applied to it in the document.'
).format('none', 'profile', '100x200', 100, 200)
help=max_image_size_help
),
}
}
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
@ -276,6 +281,11 @@ class EPUBOutput(OutputFormatPlugin):
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
if self.opts.epub_version == '3':
encryption = self.upgrade_to_epub3(tdir, opf, encryption)
else:
if cb := getattr(self, 'container_callback', None):
container, cxpath, encpath = self.create_container(tdir, opf, encryption)
cb(container)
encryption = self.end_container(cxpath, encpath)
from calibre.ebooks.epub import initialize_container
with initialize_container(output_path, os.path.basename(opf),
@ -296,34 +306,47 @@ class EPUBOutput(OutputFormatPlugin):
os.mkdir(opts.extract_to)
with ZipFile(output_path) as zf:
zf.extractall(path=opts.extract_to)
self.log.info('EPUB extracted to', opts.extract_to)
self.log.info('Book extracted to:', opts.extract_to)
def upgrade_to_epub3(self, tdir, opf, encryption=None):
self.log.info('Upgrading to EPUB 3...')
def create_container(self, tdir, opf, encryption):
from calibre.ebooks.epub import simple_container_xml
from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
try:
os.mkdir(os.path.join(tdir, 'META-INF'))
except OSError:
pass
with open(os.path.join(tdir, 'META-INF', 'container.xml'), 'wb') as f:
f.write(simple_container_xml(os.path.basename(opf)).encode('utf-8'))
enc_file_name = ''
if encryption is not None:
with open(os.path.join(tdir, 'META-INF', 'encryption.xml'), 'wb') as ef:
ef.write(as_bytes(encryption))
enc_file_name = ef.name
from calibre.ebooks.oeb.polish.container import EpubContainer
container = EpubContainer(tdir, self.log)
return container, f.name, enc_file_name
def end_container(self, cxpath, encpath):
os.remove(cxpath)
encryption = None
if encpath:
encryption = open(encpath, 'rb').read()
os.remove(encpath)
return encryption
def upgrade_to_epub3(self, tdir, opf, encryption=None):
self.log.info('Upgrading to EPUB 3...')
from calibre.ebooks.oeb.polish.cover import fix_conversion_titlepage_links_in_nav
from calibre.ebooks.oeb.polish.upgrade import epub_2_to_3
existing_nav = getattr(self.opts, 'epub3_nav_parsed', None)
nav_href = getattr(self.opts, 'epub3_nav_href', None)
previous_nav = (nav_href, existing_nav) if existing_nav is not None and nav_href else None
container, cxpath, encpath = self.create_container(tdir, opf, encryption)
epub_2_to_3(container, self.log.info, previous_nav=previous_nav)
fix_conversion_titlepage_links_in_nav(container)
container.commit()
os.remove(f.name)
if encryption is not None:
encryption = open(ef.name, 'rb').read()
os.remove(ef.name)
if cb := getattr(self, 'container_callback', None):
cb(container)
encryption = self.end_container(cxpath, encpath)
try:
os.rmdir(os.path.join(tdir, 'META-INF'))
except OSError:
@ -565,3 +588,86 @@ class EPUBOutput(OutputFormatPlugin):
simplify_toc_entry(self.oeb.toc)
# }}}
class KEPUBOutput(OutputFormatPlugin):
name = 'KEPUB Output'
author = 'Kovid Goyal'
file_type = 'kepub'
commit_name = 'kepub_output'
options = {
dont_split_on_page_breaks,
extract_to,
OptionRecommendation(name='flow_size', recommended_value=512,
help=_('Split all HTML files larger than this size (in KB). '
'This is necessary as some devices cannot handle large '
'file sizes. Set to 0 to disable size based splitting.')
),
OptionRecommendation(name='kepub_max_image_size', recommended_value='none',
help=max_image_size_help
),
OptionRecommendation(name='kepub_affect_hyphenation', recommended_value=False,
help=_('Modify how hyphenation is performed for this book. Note that hyphenation'
' does not perform well for all languages, as it depends on the dictionaries'
' present on the device, which are not always of the highest quality.')
),
OptionRecommendation(name='kepub_disable_hyphenation', recommended_value=False,
help=_('Override all hyphenation settings in book, forcefully disabling hyphenation completely.')
),
OptionRecommendation(name='kepub_hyphenation_min_chars', recommended_value=6,
help=_('Minimum word length to hyphenate, in characters.')
),
OptionRecommendation(name='kepub_hyphenation_min_chars_before', recommended_value=3,
help=_('Minimum characters before hyphens.')
),
OptionRecommendation(name='kepub_hyphenation_min_chars_after', recommended_value=3,
help=_('Minimum characters after hyphens.')
),
OptionRecommendation(name='kepub_hyphenation_limit_lines', recommended_value=2,
help=_('Maximum consecutive hyphenated lines.')
),
}
recommendations = set(EPUBOutput.recommendations)
def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.customize.ui import plugin_for_output_format
from calibre.ebooks.oeb.polish.kepubify import kepubify_container, make_options
def kepubify(container):
log.info('Adding Kobo markup...')
kopts = make_options(
affect_hyphenation=opts.kepub_affect_hyphenation,
disable_hyphenation=opts.kepub_disable_hyphenation,
hyphenation_min_chars=opts.kepub_hyphenation_min_chars,
hyphenation_min_chars_before=opts.kepub_hyphenation_min_chars_before,
hyphenation_min_chars_after=opts.kepub_hyphenation_min_chars_after,
hyphenation_limit_lines=opts.kepub_hyphenation_limit_lines,
)
kepubify_container(container, kopts)
container.commit()
epub_output = plugin_for_output_format('epub')
dp, et, fs = opts.dont_split_on_page_breaks, opts.extract_to, opts.flow_size
for opt in epub_output.options:
setattr(opts, opt.option.name, opt.recommended_value)
opts.epub_version = '3'
opts.dont_split_on_page_breaks = dp
opts.extract_to = et
opts.flow_size = fs
opts.epub_max_image_size = opts.kepub_max_image_size
epub_output.container_callback = kepubify
try:
epub_output.convert(oeb, output_path, input_plugin, opts, log)
finally:
del epub_output.container_callback

View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.conversion.config import OPTIONS
from calibre.gui2.convert import Widget
from calibre.gui2.convert.kepub_output_ui import Ui_Form
class PluginWidget(Widget, Ui_Form):
TITLE = _('KEPUB output')
HELP = _('Options specific to')+' KEPUB '+_('output')
COMMIT_NAME = 'kepub_output'
ICON = 'mimetypes/epub.png'
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, OPTIONS['output']['kepub'])
for i in range(2):
self.opt_kepub_affect_hyphenation.toggle()
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -0,0 +1,249 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>644</width>
<height>353</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QFormLayout" name="formLayout">
<item row="0" column="0">
<widget class="QCheckBox" name="opt_dont_split_on_page_breaks">
<property name="text">
<string>Do not &amp;split on page breaks</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Split files &amp;larger than:</string>
</property>
<property name="buddy">
<cstring>opt_flow_size</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QSpinBox" name="opt_flow_size">
<property name="specialValueText">
<string>Disabled</string>
</property>
<property name="suffix">
<string> KB</string>
</property>
<property name="minimum">
<number>0</number>
</property>
<property name="maximum">
<number>1000000</number>
</property>
<property name="singleStep">
<number>20</number>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>Shrink &amp;images larger than:</string>
</property>
<property name="buddy">
<cstring>opt_kepub_max_image_size</cstring>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="opt_kepub_max_image_size"/>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_kepub_affect_hyphenation">
<property name="text">
<string>Enable/disable &amp;hyphenation for this book</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_kepub_disable_hyphenation">
<property name="text">
<string>&amp;Prevent all hyphenation</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Hyphenation: &amp;minimum word length:</string>
</property>
<property name="buddy">
<cstring>opt_kepub_hyphenation_min_chars</cstring>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QSpinBox" name="opt_kepub_hyphenation_min_chars">
<property name="specialValueText">
<string>Disabled</string>
</property>
<property name="suffix">
<string> characters</string>
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Hyphenation: minimum characters &amp;before:</string>
</property>
<property name="buddy">
<cstring>opt_kepub_hyphenation_min_chars</cstring>
</property>
</widget>
</item>
<item row="6" column="1">
<widget class="QSpinBox" name="opt_kepub_hyphenation_min_chars_before">
<property name="suffix">
<string> characters</string>
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Hyphenation: minimum characters &amp;after:</string>
</property>
<property name="buddy">
<cstring>opt_kepub_hyphenation_min_chars</cstring>
</property>
</widget>
</item>
<item row="7" column="1">
<widget class="QSpinBox" name="opt_kepub_hyphenation_min_chars_after">
<property name="suffix">
<string> characters</string>
</property>
</widget>
</item>
<item row="8" column="0">
<widget class="QLabel" name="label_6">
<property name="text">
<string>Hyphenation: &amp;limit lines:</string>
</property>
<property name="buddy">
<cstring>opt_kepub_hyphenation_min_chars</cstring>
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="QSpinBox" name="opt_kepub_hyphenation_limit_lines">
<property name="suffix">
<string> lines</string>
</property>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>262</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>opt_kepub_affect_hyphenation</sender>
<signal>toggled(bool)</signal>
<receiver>opt_kepub_disable_hyphenation</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>70</x>
<y>109</y>
</hint>
<hint type="destinationlabel">
<x>67</x>
<y>132</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_kepub_affect_hyphenation</sender>
<signal>toggled(bool)</signal>
<receiver>opt_kepub_hyphenation_min_chars</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>200</x>
<y>113</y>
</hint>
<hint type="destinationlabel">
<x>274</x>
<y>161</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_kepub_affect_hyphenation</sender>
<signal>toggled(bool)</signal>
<receiver>opt_kepub_hyphenation_min_chars_before</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>159</x>
<y>115</y>
</hint>
<hint type="destinationlabel">
<x>267</x>
<y>206</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_kepub_affect_hyphenation</sender>
<signal>toggled(bool)</signal>
<receiver>opt_kepub_hyphenation_min_chars_after</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>104</x>
<y>115</y>
</hint>
<hint type="destinationlabel">
<x>245</x>
<y>241</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_kepub_affect_hyphenation</sender>
<signal>toggled(bool)</signal>
<receiver>opt_kepub_hyphenation_limit_lines</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>29</x>
<y>109</y>
</hint>
<hint type="destinationlabel">
<x>252</x>
<y>262</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -550,6 +550,24 @@ def epub_output(container):
g.appendChild(choices('epub_version', _('EP&UB version:'), ui_data.versions))
# }}}
# KEPUB Output {{{
@ep
def kepub_output(container):
g = E.div(class_='simple-group')
container.appendChild(g)
g.appendChild(checkbox('dont_split_on_page_breaks', _('Do not &split on page breaks')))
g.appendChild(int_spin('flow_size', _('Split files &larger than:'), unit='KB', max=1000000, step=20))
g.appendChild(lineedit('kepub_max_image_size', _('Shrink &images larger than:')))
g.appendChild(checkbox('kepub_affect_hyphenation', _('Enable/disable &hyphenation for this book')))
g.appendChild(checkbox('kepub_disable_hyphenation', _('&Prevent all hyphenation')))
c = _('characters')
g.appendChild(int_spin('kepub_hyphenation_min_chars', _('Hyphenation: &minimum word length:'), unit=c))
g.appendChild(int_spin('kepub_hyphenation_min_chars_before', _('Hyphenation: minimum characters &before:'), unit=c))
g.appendChild(int_spin('kepub_hyphenation_min_chars_after', _('Hyphenation: minimum characters &after:'), unit=c))
g.appendChild(int_spin('kepub_hyphenation_limit_lines', _('Hyphenation: &limit lines:'), unit=_('lines')))
# }}}
# DOCX Output {{{
@ep
def docx_output(container):