Conversion pipeline: Ignore links in the HTML that have quoted non-ASCII characters, since there is no way to decode them correctly. Fixes #5354 (Failure to download "El Pais" X3 weeks)

This commit is contained in:
Kovid Goyal 2010-04-23 11:55:12 -06:00
parent f5371fb138
commit 20ac15a4a8

View File

@ -430,7 +430,10 @@ class DirContainer(object):
return f.write(data)
def exists(self, path):
path = os.path.join(self.rootdir, urlunquote(path))
try:
path = os.path.join(self.rootdir, urlunquote(path))
except ValueError: #Happens if path contains quoted special chars
return False
return os.path.isfile(path)
def namelist(self):