Metadata download: Fix downloads from Google not working in Europe. Fixes #2043415 [downloading covers from Google Images ans open library fails](https://bugs.launchpad.net/calibre/+bug/2043415)

This commit is contained in:
Kovid Goyal 2023-11-14 21:35:03 +05:30
parent 148a0792a2
commit f9c7572b88
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 11 additions and 5 deletions

View File

@ -46,7 +46,7 @@ def imgurl_from_id(raw, tbnid):
class GoogleImages(Source):
name = 'Google Images'
version = (1, 0, 3)
version = (1, 0, 4)
minimum_calibre_version = (2, 80, 0)
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
capabilities = frozenset(['cover'])
@ -105,8 +105,13 @@ class GoogleImages(Source):
# URL scheme
url = 'https://www.google.com/search?as_st=y&tbm=isch&{}&as_epq=&as_oq=&as_eq=&cr=&as_sitesearch=&safe=images&tbs={}iar:t,ift:jpg'.format(q, sz)
log('Search URL: ' + url)
br.set_simple_cookie('CONSENT', 'YES+', '.google.com', path='/')
# See https://github.com/benbusby/whoogle-search/pull/1054 for cookies
br.set_simple_cookie('CONSENT', 'PENDING+987', '.google.com', path='/')
br.set_simple_cookie('SOCS','CAESHAgBEhJnd3NfMjAyMzA4MTAtMF9SQzIaAmRlIAEaBgiAo_CmBg', '.google.com', path='/')
# br.set_debug_http(True)
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
# with open('/t/raw.html', 'w') as f:
# f.write(raw)
root = parse_html(raw)
results = root.xpath('//div/@data-tbnid') # could also use data-id
# from calibre.utils.ipython import ipython
@ -135,6 +140,5 @@ def test():
authors=('Joe Abercrombie',))
print('Downloaded', rq.qsize(), 'covers')
if __name__ == '__main__':
test()

View File

@ -27,7 +27,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.lock import ExclusiveFile
from calibre.utils.random_ua import accept_header_for_ua
current_version = (1, 2, 2)
current_version = (1, 2, 3)
minimum_calibre_version = (2, 80, 0)
webcache = {}
webcache_lock = Lock()
@ -368,7 +368,9 @@ def google_parse_results(root, raw, log=prints, ignore_uncached=True):
def google_specialize_browser(br):
with webcache_lock:
if not hasattr(br, 'google_consent_cookie_added'):
br.set_simple_cookie('CONSENT', 'YES+', '.google.com', path='/')
# See https://github.com/benbusby/whoogle-search/pull/1054 for cookies
br.set_simple_cookie('CONSENT', 'PENDING+987', '.google.com', path='/')
br.set_simple_cookie('SOCS','CAESHAgBEhJnd3NfMjAyMzA4MTAtMF9SQzIaAmRlIAEaBgiAo_CmBg', '.google.com', path='/')
br.google_consent_cookie_added = True
return br