Update Google Images cover download plugin for website markup changes

Fixes #1862034 [when editing metadata covers are not fully downloading](https://bugs.launchpad.net/calibre/+bug/1862034)
2025-07-07 10:14:46 -04:00 · 2020-02-06 07:53:15 +05:30 · 2020-02-06 07:53:15 +05:30 · cf65226e6b
commit cf65226e6b
parent 5f3d3b2ef8
1 changed files with 28 additions and 6 deletions
--- a/src/calibre/ebooks/metadata/sources/google_images.py
+++ b/src/calibre/ebooks/metadata/sources/google_images.py
@ -23,10 +23,30 @@ def parse_html(raw):
        return parse(raw)
 def imgurl_from_id(raw, tbnid):
    from json import JSONDecoder
    q = '"{}",['.format(tbnid)
    start_pos = raw.index(q)
    if start_pos < 100:
        return
    jd = JSONDecoder()
    data = jd.raw_decode('[' + raw[start_pos:])[0]
    # from pprint import pprint
    # pprint(data)
    url_num = 0
    for x in data:
        if isinstance(x, list) and len(x) == 3:
            q = x[0]
            if hasattr(q, 'lower') and q.lower().startswith('http'):
                url_num += 1
                if url_num > 1:
                    return q
 class GoogleImages(Source):
    name = 'Google Images'
-    version = (1, 0, 1)
+    version = (1, 0, 2)
    minimum_calibre_version = (2, 80, 0)
    description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
    capabilities = frozenset(['cover'])
@ -68,7 +88,6 @@ class GoogleImages(Source):
            from urllib.parse import urlencode
        except ImportError:
            from urllib import urlencode
        import json
        from collections import OrderedDict
        ans = OrderedDict()
        br = self.browser
@ -88,13 +107,16 @@ class GoogleImages(Source):
        log('Search URL: ' + url)
        raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
        root = parse_html(raw)
-        for div in root.xpath('//div[@class="rg_meta notranslate"]'):
+        results = root.xpath('//div/@data-tbnid')  # could also use data-id
        # from calibre.utils.ipython import ipython
        # ipython({'root': root, 'raw': raw, 'url': url, 'results': results})
        for tbnid in results:
            try:
-                data = json.loads(div.text)
+                imgurl = imgurl_from_id(raw, tbnid)
            except Exception:
                continue
-            if 'ou' in data:
+            if imgurl:
-                ans[data['ou']] = True
+                ans[imgurl] = True
        return list(ans)