From 9de4b766994f0837ff1159762dd46c8e6b13480e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 10 Nov 2021 09:24:58 +0530 Subject: [PATCH] DOCX Input: Sanitize image filenames more strictly to workaround broken EPUB software. Fixes #1950412 [epub image file names contains special charactors](https://bugs.launchpad.net/calibre/+bug/1950412) --- src/calibre/ebooks/docx/images.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 873e8781eb..b8605184f8 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -6,13 +6,14 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' import os +import re +from lxml.html.builder import HR, IMG -from lxml.html.builder import IMG, HR - +from calibre import sanitize_file_name from calibre.constants import iswindows from calibre.ebooks.docx.names import barename from calibre.utils.filenames import ascii_filename -from calibre.utils.img import resize_to_fit, image_to_data +from calibre.utils.img import image_to_data, resize_to_fit from calibre.utils.imghdr import what from polyglot.builtins import iteritems, itervalues @@ -25,7 +26,7 @@ class LinkedImageNotFound(ValueError): def image_filename(x): - return ascii_filename(x).replace(' ', '_').replace('#', '_') + return sanitize_file_name(re.sub(r'[^0-9a-zA-Z.-]', '_', ascii_filename(x)).lstrip('_').lstrip('.')) def emu_to_pt(x):