From 6ae6e2ffedc82fd96f286932a917c00626c64754 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 24 Dec 2009 12:26:13 -0700 Subject: [PATCH] News download: Correctly handle URLs with non ASCII characters in them --- src/calibre/web/fetch/simple.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 4936feb77f..a5c530c38b 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -188,11 +188,12 @@ class RecursiveFetcher(object): delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(delta) - if re.search(r'\s+|,', url) is not None: - purl = list(urlparse.urlparse(url)) - for i in range(2, 6): - purl[i] = quote(purl[i]) - url = urlparse.urlunparse(purl) + if isinstance(url, unicode): + url = url.encode('utf-8') + purl = list(urlparse.urlparse(url)) + for i in range(2, 6): + purl[i] = quote(purl[i]) + url = urlparse.urlunparse(purl) try: open_func = getattr(self.browser, 'open_novisit', self.browser.open) with closing(open_func(url, timeout=self.timeout)) as f: