News download: Fix percent escaping of URLs with spaces in them not working if the URL contains non-ascii characters

This commit is contained in:
Kovid Goyal 2021-08-29 13:21:46 +05:30
parent 289e81c2de
commit 803c3826fd
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -29,7 +29,7 @@ from calibre.utils.img import image_from_data, image_to_data
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre.web.fetch.utils import rescale_image from calibre.web.fetch.utils import rescale_image
from polyglot.builtins import as_bytes, unicode_type from polyglot.builtins import unicode_type
from polyglot.http_client import responses from polyglot.http_client import responses
from polyglot.urllib import ( from polyglot.urllib import (
URLError, quote, url2pathname, urljoin, urlparse, urlsplit, urlunparse, URLError, quote, url2pathname, urljoin, urlparse, urlsplit, urlunparse,
@ -65,12 +65,10 @@ class closing(object):
def canonicalize_url(url): def canonicalize_url(url):
# mechanize does not handle quoting automatically # mechanize does not handle quoting automatically
if re.search(r'\s+', url) is not None: if re.search(r'\s+', url) is not None:
if isinstance(url, unicode_type):
url = url.encode('utf-8')
purl = list(urlparse(url)) purl = list(urlparse(url))
for i in range(2, 6): for i in range(2, 6):
purl[i] = as_bytes(quote(purl[i])) purl[i] = quote(purl[i])
url = urlunparse(purl).decode('utf-8') url = urlunparse(purl)
return url return url