From ae9af6282e092875566fe6aae55a4665f4ec28e3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Mar 2025 19:27:37 +0530 Subject: [PATCH] KEPUB Output: Fix incorrect encoding detection for some HTML files without an encoding declaration. Fixes #2105424 [Private bug](https://bugs.launchpad.net/calibre/+bug/2105424) --- src/calibre/ebooks/oeb/polish/kepubify.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/kepubify.py b/src/calibre/ebooks/oeb/polish/kepubify.py index 185c596908..64966d5075 100644 --- a/src/calibre/ebooks/oeb/polish/kepubify.py +++ b/src/calibre/ebooks/oeb/polish/kepubify.py @@ -29,7 +29,7 @@ from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_a from calibre.ebooks.oeb.polish.container import Container, EpubContainer, get_container from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image3, find_cover_page from calibre.ebooks.oeb.polish.errors import DRMError -from calibre.ebooks.oeb.polish.parsing import parse +from calibre.ebooks.oeb.polish.parsing import decode_xml, parse from calibre.ebooks.oeb.polish.tts import lang_for_elem from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing from calibre.spell.break_iterator import sentence_positions @@ -337,7 +337,7 @@ def kepubify_parsed_html(root: etree.Element, kobo_js_href: str, opts: Options, def kepubify_html_data(raw: str | bytes, kobo_js_href: str = KOBO_JS_NAME, opts: Options = Options(), metadata_lang: str = 'en'): - root = parse(raw) + root = parse(raw, decoder=lambda x: decode_xml(x)[0]) kepubify_parsed_html(root, kobo_js_href, opts, metadata_lang) return root