From c9d3d132be3f1e93a015d4bb432b5778c6fb0090 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 5 Aug 2022 07:07:22 +0530
Subject: [PATCH] Google search engine: Fix for different URL encoding used by
 Google servers in Portugal

---
 src/calibre/ebooks/metadata/sources/search_engines.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py
index 55e0053d50..23aecb7d6f 100644
--- a/src/calibre/ebooks/metadata/sources/search_engines.py
+++ b/src/calibre/ebooks/metadata/sources/search_engines.py
@@ -25,7 +25,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.lock import ExclusiveFile
 from calibre.utils.random_ua import accept_header_for_ua
 
-current_version = (1, 2, 0)
+current_version = (1, 2, 1)
 minimum_calibre_version = (2, 80, 0)
 webcache = {}
 webcache_lock = Lock()
@@ -289,6 +289,7 @@ def google_extract_cache_urls(raw):
         raw = raw.decode('utf-8', 'replace')
     pat = re.compile(r'\\x22(https://webcache\.googleusercontent\.com/.+?)\\x22')
     upat = re.compile(r'\\\\u([0-9a-fA-F]{4})')
+    xpat = re.compile(r'\\x([0-9a-fA-F]{2})')
     cache_pat = re.compile('cache:([^:]+):(.+)')
 
     def urepl(m):
@@ -298,6 +299,10 @@ def google_extract_cache_urls(raw):
     ans = {}
     for m in pat.finditer(raw):
         cache_url = upat.sub(urepl, m.group(1))
+        # the following two are necessary for results from Portugal
+        cache_url = xpat.sub(urepl, cache_url)
+        cache_url = cache_url.replace('&amp;', '&')
+
         m = cache_pat.search(cache_url)
         cache_id, src_url = m.group(1), m.group(2)
         if cache_id in seen: