mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update Google Images cover download plugin for website markup changes
Fixes #1862034 [when editing metadata covers are not fully downloading](https://bugs.launchpad.net/calibre/+bug/1862034)
This commit is contained in:
parent
5f3d3b2ef8
commit
cf65226e6b
@ -23,10 +23,30 @@ def parse_html(raw):
|
|||||||
return parse(raw)
|
return parse(raw)
|
||||||
|
|
||||||
|
|
||||||
|
def imgurl_from_id(raw, tbnid):
|
||||||
|
from json import JSONDecoder
|
||||||
|
q = '"{}",['.format(tbnid)
|
||||||
|
start_pos = raw.index(q)
|
||||||
|
if start_pos < 100:
|
||||||
|
return
|
||||||
|
jd = JSONDecoder()
|
||||||
|
data = jd.raw_decode('[' + raw[start_pos:])[0]
|
||||||
|
# from pprint import pprint
|
||||||
|
# pprint(data)
|
||||||
|
url_num = 0
|
||||||
|
for x in data:
|
||||||
|
if isinstance(x, list) and len(x) == 3:
|
||||||
|
q = x[0]
|
||||||
|
if hasattr(q, 'lower') and q.lower().startswith('http'):
|
||||||
|
url_num += 1
|
||||||
|
if url_num > 1:
|
||||||
|
return q
|
||||||
|
|
||||||
|
|
||||||
class GoogleImages(Source):
|
class GoogleImages(Source):
|
||||||
|
|
||||||
name = 'Google Images'
|
name = 'Google Images'
|
||||||
version = (1, 0, 1)
|
version = (1, 0, 2)
|
||||||
minimum_calibre_version = (2, 80, 0)
|
minimum_calibre_version = (2, 80, 0)
|
||||||
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
|
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
|
||||||
capabilities = frozenset(['cover'])
|
capabilities = frozenset(['cover'])
|
||||||
@ -68,7 +88,6 @@ class GoogleImages(Source):
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
import json
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
ans = OrderedDict()
|
ans = OrderedDict()
|
||||||
br = self.browser
|
br = self.browser
|
||||||
@ -88,13 +107,16 @@ class GoogleImages(Source):
|
|||||||
log('Search URL: ' + url)
|
log('Search URL: ' + url)
|
||||||
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
|
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
|
||||||
root = parse_html(raw)
|
root = parse_html(raw)
|
||||||
for div in root.xpath('//div[@class="rg_meta notranslate"]'):
|
results = root.xpath('//div/@data-tbnid') # could also use data-id
|
||||||
|
# from calibre.utils.ipython import ipython
|
||||||
|
# ipython({'root': root, 'raw': raw, 'url': url, 'results': results})
|
||||||
|
for tbnid in results:
|
||||||
try:
|
try:
|
||||||
data = json.loads(div.text)
|
imgurl = imgurl_from_id(raw, tbnid)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
if 'ou' in data:
|
if imgurl:
|
||||||
ans[data['ou']] = True
|
ans[imgurl] = True
|
||||||
return list(ans)
|
return list(ans)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user