diff --git a/setup/plugins_mirror.py b/setup/plugins_mirror.py
index aac0c10fc4..e17e648c92 100644
--- a/setup/plugins_mirror.py
+++ b/setup/plugins_mirror.py
@@ -124,7 +124,7 @@ def parse_index(raw=None): # {{{
return category
raise ValueError('Could not find category for offset: ' + str(offset))
- for match in re.finditer(r'''(?is)
(.+?)<(.+?)''', raw):
+ for match in re.finditer(r'''(?is)(.+?)<(.+?)''', raw):
name, url, rest = u(match.group(2)), u(match.group(1)), match.group(3)
category = category_at(match.start(2))
deprecated = category == deprecated_category
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 8777babc88..7c8af1caea 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -398,6 +398,7 @@ class GoogleBooks(Source):
timeout=30
):
from calibre.utils.filenames import ascii_text
+ from polyglot.urllib import urlparse
isbn = check_isbn(identifiers.get('isbn', None))
q = []
strip_punc_pat = regex.compile(r'[\p{C}|\p{M}|\p{P}|\p{S}|\p{Z}]+', regex.UNICODE)
@@ -440,7 +441,13 @@ class GoogleBooks(Source):
pat = re.compile(r'id=([^&]+)')
for q in se.google_parse_results(root, r[0], log=log, ignore_uncached=False):
m = pat.search(q.url)
- if m is None or not q.url.startswith('https://books.google'):
+ if m is None or not q.url:
+ continue
+ try:
+ purl = urlparse(q.url)
+ except Exception:
+ continue
+ if not purl.hostname.startswith('https://books.google'):
continue
google_ids.append(m.group(1))
diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py
index e36eb19abe..44f409d4b6 100644
--- a/src/calibre/ebooks/metadata/sources/search_engines.py
+++ b/src/calibre/ebooks/metadata/sources/search_engines.py
@@ -336,7 +336,7 @@ def google_parse_results(root, raw, log=prints, ignore_uncached=True):
purl = urlparse(url)
except Exception:
continue
- if 'google.com' in purl.netloc:
+ if purl.hostname.endswith('google.com'):
continue
try:
title = tostring(next(a.iterchildren('span')))