Update Google Images cover download plugin for website markup changes

Fixes #1862034 [when editing metadata covers are not fully downloading](https://bugs.launchpad.net/calibre/+bug/1862034)
This commit is contained in:
Kovid Goyal 2020-02-06 07:53:15 +05:30
parent 5f3d3b2ef8
commit cf65226e6b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -23,10 +23,30 @@ def parse_html(raw):
return parse(raw) return parse(raw)
def imgurl_from_id(raw, tbnid):
from json import JSONDecoder
q = '"{}",['.format(tbnid)
start_pos = raw.index(q)
if start_pos < 100:
return
jd = JSONDecoder()
data = jd.raw_decode('[' + raw[start_pos:])[0]
# from pprint import pprint
# pprint(data)
url_num = 0
for x in data:
if isinstance(x, list) and len(x) == 3:
q = x[0]
if hasattr(q, 'lower') and q.lower().startswith('http'):
url_num += 1
if url_num > 1:
return q
class GoogleImages(Source): class GoogleImages(Source):
name = 'Google Images' name = 'Google Images'
version = (1, 0, 1) version = (1, 0, 2)
minimum_calibre_version = (2, 80, 0) minimum_calibre_version = (2, 80, 0)
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.') description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
capabilities = frozenset(['cover']) capabilities = frozenset(['cover'])
@ -68,7 +88,6 @@ class GoogleImages(Source):
from urllib.parse import urlencode from urllib.parse import urlencode
except ImportError: except ImportError:
from urllib import urlencode from urllib import urlencode
import json
from collections import OrderedDict from collections import OrderedDict
ans = OrderedDict() ans = OrderedDict()
br = self.browser br = self.browser
@ -88,13 +107,16 @@ class GoogleImages(Source):
log('Search URL: ' + url) log('Search URL: ' + url)
raw = clean_ascii_chars(br.open(url).read().decode('utf-8')) raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
root = parse_html(raw) root = parse_html(raw)
for div in root.xpath('//div[@class="rg_meta notranslate"]'): results = root.xpath('//div/@data-tbnid') # could also use data-id
# from calibre.utils.ipython import ipython
# ipython({'root': root, 'raw': raw, 'url': url, 'results': results})
for tbnid in results:
try: try:
data = json.loads(div.text) imgurl = imgurl_from_id(raw, tbnid)
except Exception: except Exception:
continue continue
if 'ou' in data: if imgurl:
ans[data['ou']] = True ans[imgurl] = True
return list(ans) return list(ans)