News download: Correctly handle URLs with non ASCII characters in them

2025-08-30 23:00:21 -04:00 · 2009-12-24 12:26:13 -07:00 · 2009-12-24 12:26:13 -07:00 · 6ae6e2ffed
commit 6ae6e2ffed
parent b0eb97c60a
1 changed files with 6 additions and 5 deletions
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -188,11 +188,12 @@ class RecursiveFetcher(object):
        delta = time.time() - self.last_fetch_at
        if  delta < self.delay:
            time.sleep(delta)
-        if re.search(r'\s+|,', url) is not None:
+        if isinstance(url, unicode):
-            purl = list(urlparse.urlparse(url))
+            url = url.encode('utf-8')
-            for i in range(2, 6):
+        purl = list(urlparse.urlparse(url))
-                purl[i] = quote(purl[i])
+        for i in range(2, 6):
-            url = urlparse.urlunparse(purl)
+            purl[i] = quote(purl[i])
        url = urlparse.urlunparse(purl)
        try:
            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
            with closing(open_func(url, timeout=self.timeout)) as f: