From 9a6d284cf438b52878c932fd361b72a02d86c61f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Sep 2021 18:23:42 +0530 Subject: [PATCH] Convert bytes urls to strings when canonicalizing internal urls --- src/calibre/web/feeds/news.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 898f25a589..ba7544a758 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -681,7 +681,13 @@ class BasicNewsRecipe(Recipe): except Exception: self.log.error('Failed to parse url: %r, ignoring' % url) return frozenset() - return frozenset([(parts.netloc, (parts.path or '').rstrip('/'))]) + nl = parts.netloc + path = parts.path or '' + if isinstance(nl, bytes): + nl = nl.decode('utf-8', 'replace') + if isinstance(path, bytes): + path = path.decode('utf-8', 'replace') + return frozenset({(nl, path.rstrip('/'))}) def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None): '''