Convert bytes urls to strings when canonicalizing internal urls

This commit is contained in:
Kovid Goyal 2021-09-10 18:23:42 +05:30
parent 78822589d8
commit 9a6d284cf4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -681,7 +681,13 @@ class BasicNewsRecipe(Recipe):
except Exception: except Exception:
self.log.error('Failed to parse url: %r, ignoring' % url) self.log.error('Failed to parse url: %r, ignoring' % url)
return frozenset() return frozenset()
return frozenset([(parts.netloc, (parts.path or '').rstrip('/'))]) nl = parts.netloc
path = parts.path or ''
if isinstance(nl, bytes):
nl = nl.decode('utf-8', 'replace')
if isinstance(path, bytes):
path = path.decode('utf-8', 'replace')
return frozenset({(nl, path.rstrip('/'))})
def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None): def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None):
''' '''