Set the Referer when accessing cached pages

2025-07-09 03:04:10 -04:00 · 2017-03-02 21:47:52 +05:30 · 2017-03-02 21:47:52 +05:30 · faf46f844f
commit faf46f844f
parent 122a1de44d
2 changed files with 9 additions and 7 deletions
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -1221,7 +1221,9 @@ class Amazon(Source):
        matches = []
        se = search_engines_module()
        urlproc = se.bing_url_processor
-        for result in se.bing_search(terms, site, log=log, br=br, timeout=timeout):
+        results, qurl = se.bing_search(terms, site, log=log, br=br, timeout=timeout)
+        br.set_current_header('Referer', qurl)
+        for result in results:
            if abort.is_set():
                return matches, terms, domain, None

--- a/src/calibre/ebooks/metadata/sources/search_engines.py
+++ b/src/calibre/ebooks/metadata/sources/search_engines.py
@ -129,12 +129,12 @@ def ddg_search(terms, site=None, br=None, log=prints, safe_search=False, dump_ra
    ans = []
    for a in root.xpath('//*[@class="results"]//*[@class="result__title"]/a[@href and @class="result__a"]'):
        ans.append(Result(ddg_href(a.get('href')), tostring(a), None))
-    return ans
+    return ans, url


 def ddg_develop():
    br = browser()
-    for result in ddg_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br):
+    for result in ddg_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]:
        if '/dp/' in result.url:
            print(result.title)
            print(' ', result.url)
@ -177,12 +177,12 @@ def bing_search(terms, site=None, br=None, log=prints, safe_search=False, dump_r
        cached_url = 'http://cc.bingj.com/cache.aspx?q={q}&d={d}&mkt=en-US&setlang=en-US&w={w}'.format(
            q=q, d=d, w=w)
        ans.append(Result(a.get('href'), tostring(a), cached_url))
-    return ans
+    return ans, url


 def bing_develop():
    br = browser()
-    for result in bing_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br):
+    for result in bing_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]:
        if '/dp/' in result.url:
            print(result.title)
            print(' ', result.url)
@ -220,12 +220,12 @@ def google_search(terms, site=None, br=None, log=prints, safe_search=False, dump
        c = div.xpath('descendant::div[@class="s"]//a[@class="fl"]')[0]
        cached_url = c.get('href')
        ans.append(Result(a.get('href'), tostring(a), cached_url))
-    return ans
+    return ans, url


 def google_develop():
    br = browser()
-    for result in google_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br):
+    for result in google_search('heroes abercrombie'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]:
        if '/dp/' in result.url:
            print(result.title)
            print(' ', result.url)