Edit Book: When downloading external resources, ensure the generated filenames are valid. Fixes #1639448 [download external resorces for dificult url make broken links to files](https://bugs.launchpad.net/calibre/+bug/1639448)

This commit is contained in:
Kovid Goyal 2016-11-06 10:55:45 +05:30
parent f519b40778
commit 082b1593d6
3 changed files with 20 additions and 8 deletions

View File

@ -106,18 +106,23 @@ class NamedEntities(BaseError):
return changed
def make_filename_safe(name):
from calibre.utils.filenames import ascii_filename
def esc(n):
return ''.join(x if x in URL_SAFE else '_' for x in n)
return '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
class EscapedName(BaseError):
level = WARN
def __init__(self, name):
from calibre.utils.filenames import ascii_filename
BaseError.__init__(self, _('Filename contains unsafe characters'), name)
qname = urlquote(name)
def esc(n):
return ''.join(x if x in URL_SAFE else '_' for x in n)
self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
self.sname = make_filename_safe(name)
self.HELP = _(
'The filename {0} contains unsafe characters, that must be escaped, like'
' this {1}. This can cause problems with some ebook readers. To be'

View File

@ -88,6 +88,14 @@ class ProgressTracker(object):
return ret
def sanitize_file_name(x):
from calibre.ebooks.oeb.polish.check.parsing import make_filename_safe
x = sanitize_file_name2(x)
while '..' in x:
x = x.replace('..', '.')
return make_filename_safe(x)
def download_one(tdir, timeout, progress_report, url):
try:
purl = urlparse(url)
@ -104,13 +112,13 @@ def download_one(tdir, timeout, progress_report, url):
dest = ProgressTracker(df, url, sz, progress_report)
with closing(src):
shutil.copyfileobj(src, dest)
filename = sanitize_file_name2(filename)
filename = sanitize_file_name(filename)
mt = guess_type(filename)
if mt in OEB_DOCS:
raise ValueError('The external resource {} looks like a HTML document ({})'.format(url, filename))
if not mt or mt == 'application/octet-stream' or '.' not in filename:
raise ValueError('The external resource {} is not of a known type'.format(url))
return True, (url, sanitize_file_name2(filename), dest.name, mt)
return True, (url, filename, dest.name, mt)
except Exception as err:
return False, (url, as_unicode(err))

View File

@ -12,7 +12,7 @@ def get_download_filename_from_response(response):
filename = last_part_name = ''
try:
purl = urlparse(response.geturl())
last_part_name = purl.path.split('/')[-1]
last_part_name = urllib2_unquote(purl.path.split('/')[-1])
disposition = response.info().get('Content-disposition', '')
for p in disposition.split(';'):
if 'filename' in p:
@ -58,4 +58,3 @@ def get_download_filename(url, cookie_file=None):
traceback.print_exc()
return filename