Conversion: Add an option under Structure detection to automatically fill in blank alt attributes for images that have alt text in the image file metadata

This commit is contained in:
Kovid Goyal 2024-05-25 14:49:29 +05:30
parent 6815707587
commit 4cf79480f5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 99 additions and 44 deletions

View File

@ -239,7 +239,7 @@ def add_pipeline_options(parser, plumber):
'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image',
'insert_metadata', 'page_breaks_before',
'remove_fake_margins', 'start_reading_at',
'remove_fake_margins', 'start_reading_at', 'add_alt_text_to_img',
]
)),

View File

@ -260,7 +260,7 @@ OPTIONS = {
'structure_detection': (
'chapter', 'chapter_mark', 'start_reading_at',
'remove_first_image', 'remove_fake_margins', 'insert_metadata',
'page_breaks_before'),
'page_breaks_before', 'add_alt_text_to_img',),
'toc': (
'level1_toc', 'level2_toc', 'level3_toc',

View File

@ -420,6 +420,11 @@ OptionRecommendation(name='remove_fake_margins',
'case you can disable the removal.')
),
OptionRecommendation(name='add_alt_text_to_img',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('When an <img> tag has no alt attribute, check the associated image file for metadata that specifies alternate text, and'
' use it to fill in the alt attribute. The alt attribute is used by screen readers for assisting the visually challenged.')
),
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
@ -1203,6 +1208,12 @@ OptionRecommendation(name='search_replace',
from calibre.ebooks.oeb.transforms.jacket import Jacket
Jacket()(self.oeb, self.opts, self.user_metadata)
pr(0.37)
self.flush()
if self.opts.add_alt_text_to_img:
from calibre.ebooks.oeb.transforms.alt_text import AddAltText
AddAltText()(self.oeb, self.opts)
pr(0.4)
self.flush()

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from io import BytesIO
from PIL import Image
from calibre.ebooks.oeb.base import SVG_MIME, urlnormalize, xpath
from calibre.utils.img import read_alt_text
def process_spine_item(item, hrefs, log):
html = item.data
for elem in xpath(html, '//h:img[@src]'):
src = urlnormalize(elem.attrib['src'])
image = hrefs.get(item.abshref(src), None)
if image and image.media_type != SVG_MIME and not elem.attrib.get('alt'):
data = image.bytes_representation
try:
with Image.open(BytesIO(data)) as im:
alt = read_alt_text(im)
except Exception as err:
log.warn(f'Failed to read alt text from image {src} with error: {err}')
else:
if alt:
elem.set('alt', alt)
class AddAltText:
def __call__(self, oeb, opts):
oeb.logger.info('Add alt text to images...')
hrefs = oeb.manifest.hrefs
for item in oeb.spine:
process_spine_item(item, hrefs, oeb.log)

View File

@ -14,39 +14,9 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="2" column="3">
<widget class="QCheckBox" name="opt_remove_fake_margins">
<property name="text">
<string>Remove &amp;fake margins</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="4">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The header and footer removal options have been replaced by the Search &amp; replace options. Click the Search &amp; replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="5" column="0" rowspan="2" colspan="4">
<item row="7" column="0" rowspan="2" colspan="4">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item>
<item row="3" column="0" colspan="4">
<widget class="QCheckBox" name="opt_insert_metadata">
<property name="text">
<string>Insert &amp;metadata as page at start of book</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="4">
<widget class="XPathEdit" name="opt_start_reading_at" native="true"/>
</item>
<item row="0" column="0" colspan="4">
<widget class="XPathEdit" name="opt_chapter" native="true"/>
</item>
<item row="1" column="0" colspan="2">
<widget class="QLabel" name="label">
<property name="text">
@ -57,12 +27,8 @@
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QComboBox" name="opt_chapter_mark">
<property name="minimumContentsLength">
<number>20</number>
</property>
</widget>
<item row="0" column="0" colspan="4">
<widget class="XPathEdit" name="opt_chapter" native="true"/>
</item>
<item row="1" column="3">
<spacer name="horizontalSpacer">
@ -77,14 +43,24 @@
</property>
</spacer>
</item>
<item row="2" column="0" colspan="3">
<widget class="QCheckBox" name="opt_remove_first_image">
<property name="text">
<string>Remove first &amp;image</string>
<item row="1" column="2">
<widget class="QComboBox" name="opt_chapter_mark">
<property name="minimumContentsLength">
<number>20</number>
</property>
</widget>
</item>
<item row="8" column="0">
<item row="6" column="0" colspan="4">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The header and footer removal options have been replaced by the Search &amp; replace options. Click the Search &amp; replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="10" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -97,6 +73,37 @@
</property>
</spacer>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_insert_metadata">
<property name="text">
<string>Insert &amp;metadata as page at start of book</string>
</property>
</widget>
</item>
<item row="9" column="0" colspan="4">
<widget class="XPathEdit" name="opt_start_reading_at" native="true"/>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_remove_fake_margins">
<property name="text">
<string>Remove &amp;fake margins</string>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QCheckBox" name="opt_remove_first_image">
<property name="text">
<string>Remove first &amp;image</string>
</property>
</widget>
</item>
<item row="4" column="1">
<widget class="QCheckBox" name="opt_add_alt_text_to_img">
<property name="text">
<string>Add &amp;alt text to images</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -360,6 +360,7 @@ def structure_detection(container):
g.appendChild(choices('chapter_mark', _('Chap&ter mark:'), ['pagebreak', 'rule', 'both', 'none']))
g.appendChild(checkbox('remove_first_image', _('Remove first &image')))
g.appendChild(checkbox('remove_fake_margins', _('Remove &fake margins')))
g.appendChild(checkbox('add_alt_text_to_img', _('Add &alt text to images')))
g.appendChild(checkbox('insert_metadata', _('Insert metadata at start of book')))
g.appendChild(lineedit('page_breaks_before', _('Insert page breaks before'), 50))
g.appendChild(lineedit('start_reading_at', _('Start reading at'), 50))