News download: Correctly handle URLs with non ASCII characters in them

This commit is contained in:
Kovid Goyal 2009-12-24 12:26:13 -07:00
parent b0eb97c60a
commit 6ae6e2ffed

View File

@ -188,11 +188,12 @@ class RecursiveFetcher(object):
delta = time.time() - self.last_fetch_at delta = time.time() - self.last_fetch_at
if delta < self.delay: if delta < self.delay:
time.sleep(delta) time.sleep(delta)
if re.search(r'\s+|,', url) is not None: if isinstance(url, unicode):
purl = list(urlparse.urlparse(url)) url = url.encode('utf-8')
for i in range(2, 6): purl = list(urlparse.urlparse(url))
purl[i] = quote(purl[i]) for i in range(2, 6):
url = urlparse.urlunparse(purl) purl[i] = quote(purl[i])
url = urlparse.urlunparse(purl)
try: try:
open_func = getattr(self.browser, 'open_novisit', self.browser.open) open_func = getattr(self.browser, 'open_novisit', self.browser.open)
with closing(open_func(url, timeout=self.timeout)) as f: with closing(open_func(url, timeout=self.timeout)) as f: