Edit Book: When downloading external resources, ensure the generated filenames are valid. Fixes #1639448 [download external resorces for dificult url make broken links to files](https://bugs.launchpad.net/calibre/+bug/1639448)

2025-08-30 23:00:21 -04:00 · 2016-11-06 10:55:45 +05:30 · 2016-11-06 10:55:45 +05:30 · 082b1593d6
commit 082b1593d6
parent f519b40778
3 changed files with 20 additions and 8 deletions
--- a/src/calibre/ebooks/oeb/polish/check/parsing.py
+++ b/src/calibre/ebooks/oeb/polish/check/parsing.py
@ -106,18 +106,23 @@ class NamedEntities(BaseError):
        return changed


+def make_filename_safe(name):
+    from calibre.utils.filenames import ascii_filename
+
+    def esc(n):
+        return ''.join(x if x in URL_SAFE else '_' for x in n)
+    return '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
+
+
 class EscapedName(BaseError):

    level = WARN

    def __init__(self, name):
-        from calibre.utils.filenames import ascii_filename
        BaseError.__init__(self, _('Filename contains unsafe characters'), name)
        qname = urlquote(name)

-        def esc(n):
-            return ''.join(x if x in URL_SAFE else '_' for x in n)
-        self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
+        self.sname = make_filename_safe(name)
        self.HELP = _(
            'The filename {0} contains unsafe characters, that must be escaped, like'
            ' this {1}. This can cause problems with some ebook readers. To be'
--- a/src/calibre/ebooks/oeb/polish/download.py
+++ b/src/calibre/ebooks/oeb/polish/download.py
@ -88,6 +88,14 @@ class ProgressTracker(object):
        return ret


+def sanitize_file_name(x):
+    from calibre.ebooks.oeb.polish.check.parsing import make_filename_safe
+    x = sanitize_file_name2(x)
+    while '..' in x:
+        x = x.replace('..', '.')
+    return make_filename_safe(x)
+
+
 def download_one(tdir, timeout, progress_report, url):
    try:
        purl = urlparse(url)
@ -104,13 +112,13 @@ def download_one(tdir, timeout, progress_report, url):
            dest = ProgressTracker(df, url, sz, progress_report)
            with closing(src):
                shutil.copyfileobj(src, dest)
-            filename = sanitize_file_name2(filename)
+            filename = sanitize_file_name(filename)
            mt = guess_type(filename)
            if mt in OEB_DOCS:
                raise ValueError('The external resource {} looks like a HTML document ({})'.format(url, filename))
            if not mt or mt == 'application/octet-stream' or '.' not in filename:
                raise ValueError('The external resource {} is not of a known type'.format(url))
-            return True, (url, sanitize_file_name2(filename), dest.name, mt)
+            return True, (url, filename, dest.name, mt)
    except Exception as err:
        return False, (url, as_unicode(err))

--- a/src/calibre/web/init.py
+++ b/src/calibre/web/init.py
@ -12,7 +12,7 @@ def get_download_filename_from_response(response):
    filename = last_part_name = ''
    try:
        purl = urlparse(response.geturl())
-        last_part_name = purl.path.split('/')[-1]
+        last_part_name = urllib2_unquote(purl.path.split('/')[-1])
        disposition = response.info().get('Content-disposition', '')
        for p in disposition.split(';'):
            if 'filename' in p:
@ -58,4 +58,3 @@ def get_download_filename(url, cookie_file=None):
        traceback.print_exc()

    return filename
-