Merge branch 'py3' of https://github.com/eli-schwartz/calibre

2025-07-09 03:04:10 -04:00 · 2019-04-23 15:55:08 +05:30 · 2019-04-23 15:55:08 +05:30 · bfbc31fa9f
commit bfbc31fa9f
parent 84d939fb06 b9224f17c4
1 changed files with 7 additions and 11 deletions
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -103,7 +103,7 @@ def save_soup(soup, target):
        f.write(html.encode('utf-8'))


-class response(str):
+class response(bytes):

    def __new__(cls, *args):
        obj = super(response, cls).__new__(cls, *args)
@ -261,16 +261,14 @@ class RecursiveFetcher(object):
        delta = time.time() - self.last_fetch_at
        if delta < self.delay:
            time.sleep(self.delay - delta)
+        # mechanize does not handle quoting automatically
+        if re.search(r'\s+', url) is not None:
            if isinstance(url, unicode_type):
                url = url.encode('utf-8')
-        # Not sure is this is really needed as I think mechanize
-        # handles quoting automatically, but leaving it
-        # in case it breaks something
-        if re.search(r'\s+', url) is not None:
            purl = list(urlparse(url))
            for i in range(2, 6):
                purl[i] = quote(purl[i])
-            url = urlunparse(purl)
+            url = urlunparse(purl).decode('utf-8')
        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
        try:
            with closing(open_func(url, timeout=self.timeout)) as f:
@ -414,8 +412,6 @@ class RecursiveFetcher(object):
                    continue
            c += 1
            fname = ascii_filename('img'+str(c))
-            if isinstance(fname, unicode_type):
-                fname = fname.encode('ascii', 'replace')
            data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
            if data is None:
                continue
@ -520,7 +516,7 @@ class RecursiveFetcher(object):
                    dsrc = self.fetch_url(iurl)
                    newbaseurl = dsrc.newurl
                    if len(dsrc) == 0 or \
-                       len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
+                       len(re.compile(b'<!--.*?-->', re.DOTALL).sub(b'', dsrc).strip()) == 0:
                        raise ValueError('No content at URL %r'%iurl)
                    if callable(self.encoding):
                        dsrc = self.encoding(dsrc)
@ -544,7 +540,7 @@ class RecursiveFetcher(object):
                    _fname = basename(iurl)
                    if not isinstance(_fname, unicode_type):
                        _fname.decode('latin1', 'replace')
-                    _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
+                    _fname = _fname.replace('%', '').replace(os.sep, '')
                    _fname = ascii_filename(_fname)
                    _fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
                    res = os.path.join(linkdiskpath, _fname)