diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 971f52a4e5..ca81b139e4 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -103,7 +103,7 @@ def save_soup(soup, target): f.write(html.encode('utf-8')) -class response(str): +class response(bytes): def __new__(cls, *args): obj = super(response, cls).__new__(cls, *args) @@ -261,16 +261,13 @@ class RecursiveFetcher(object): delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(self.delay - delta) - if isinstance(url, unicode_type): - url = url.encode('utf-8') - # Not sure is this is really needed as I think mechanize - # handles quoting automatically, but leaving it - # in case it breaks something + # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: + url = url.encode('utf-8') purl = list(urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) - url = urlunparse(purl) + url = urlunparse(purl).decode('utf-8') open_func = getattr(self.browser, 'open_novisit', self.browser.open) try: with closing(open_func(url, timeout=self.timeout)) as f: @@ -414,8 +411,6 @@ class RecursiveFetcher(object): continue c += 1 fname = ascii_filename('img'+str(c)) - if isinstance(fname, unicode_type): - fname = fname.encode('ascii', 'replace') data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data if data is None: continue @@ -520,7 +515,7 @@ class RecursiveFetcher(object): dsrc = self.fetch_url(iurl) newbaseurl = dsrc.newurl if len(dsrc) == 0 or \ - len(re.compile('', re.DOTALL).sub('', dsrc).strip()) == 0: + len(re.compile(b'', re.DOTALL).sub(b'', dsrc).strip()) == 0: raise ValueError('No content at URL %r'%iurl) if callable(self.encoding): dsrc = self.encoding(dsrc) @@ -544,7 +539,7 @@ class RecursiveFetcher(object): _fname = basename(iurl) if not isinstance(_fname, unicode_type): _fname.decode('latin1', 'replace') - _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') + _fname = _fname.replace('%', '').replace(os.sep, '').encode('ascii', 'replace') _fname = ascii_filename(_fname) _fname = os.path.splitext(_fname)[0][:120] + '.xhtml' res = os.path.join(linkdiskpath, _fname)