From 558e0a7b5bf1b87841fa28bddd55111288da9bd3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 26 Sep 2024 20:16:21 +0530 Subject: [PATCH] Comic Input: Handle comics whose internal files have control codes in their filenames. Fixes #2081982 [Unicode in filename of a CBZ archive breaks due to passing it to LXML](https://bugs.launchpad.net/calibre/+bug/2081982) --- src/calibre/ebooks/comic/input.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 76951d0883..a691a502ee 100644 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -13,6 +13,7 @@ import traceback from calibre import extract, prints, walk from calibre.constants import filesystem_encoding from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.icu import numeric_sort_key from calibre.utils.ipc.job import ParallelJob from calibre.utils.ipc.server import Server @@ -35,7 +36,7 @@ def extract_comic(path_to_comic_file): extract(path_to_comic_file, tdir) for x in walk(tdir): bn = os.path.basename(x) - nbn = bn.replace('#', '_') + nbn = clean_ascii_chars(bn.replace('#', '_')) if nbn != bn: os.rename(x, os.path.join(os.path.dirname(x), nbn)) return tdir