From f07335f107a5372794fb26d4149112c36551839b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 26 Sep 2024 12:36:32 +0530 Subject: [PATCH] Function to get bing cached version of URL --- .../ebooks/metadata/sources/search_engines.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py index 84db490be1..b50cd231ca 100644 --- a/src/calibre/ebooks/metadata/sources/search_engines.py +++ b/src/calibre/ebooks/metadata/sources/search_engines.py @@ -217,6 +217,12 @@ def bing_url_processor(url): return url +def bing_cached_url(url): + results, search_url = bing_search(['url:' + url]) + for result in results: + return result.cached_url + + def resolve_bing_wrapper_page(url, br, log): raw = br.open_novisit(url).read().decode('utf-8', 'replace') m = re.search(r'var u = "(.+)"', raw) @@ -259,10 +265,10 @@ def bing_search(terms, site=None, br=None, log=prints, safe_search=False, dump_r d, w = div.get('u').split('|')[-2:] cached_url = 'https://cc.bingj.com/cache.aspx?q={q}&d={d}&mkt=en-US&setlang=en-US&w={w}'.format( q=q, d=d, w=w) - url = a.get('href') - if url.startswith('https://www.bing.com/'): - url = resolve_bing_wrapper_page(url, br, log) - ans.append(Result(url, title, cached_url)) + ans_url = a.get('href') + if ans_url.startswith('https://www.bing.com/'): + ans_url = resolve_bing_wrapper_page(ans_url, br, log) + ans.append(Result(ans_url, title, cached_url)) if not ans: title = ' '.join(root.xpath('//title/text()')) log('Failed to find any results on results page, with title:', title)