mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Google Images cover download plugin for website changes. Fixes #2069553 [Version 7.12 no longer downloading cover images from all sources](https://bugs.launchpad.net/calibre/+bug/2069553)
This commit is contained in:
parent
0d611c65f7
commit
43121af37d
@ -43,10 +43,26 @@ def imgurl_from_id(raw, tbnid):
|
|||||||
return q
|
return q
|
||||||
|
|
||||||
|
|
||||||
|
def parse_google_markup(raw):
|
||||||
|
root = parse_html(raw)
|
||||||
|
# newer markup pages use data-docid not data-tbnid
|
||||||
|
results = root.xpath('//div/@data-tbnid') or root.xpath('//div/@data-docid')
|
||||||
|
ans = OrderedDict()
|
||||||
|
for tbnid in results:
|
||||||
|
try:
|
||||||
|
imgurl = imgurl_from_id(raw, tbnid)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if imgurl:
|
||||||
|
ans[imgurl] = True
|
||||||
|
return list(ans)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleImages(Source):
|
class GoogleImages(Source):
|
||||||
|
|
||||||
name = 'Google Images'
|
name = 'Google Images'
|
||||||
version = (1, 0, 5)
|
version = (1, 0, 6)
|
||||||
minimum_calibre_version = (2, 80, 0)
|
minimum_calibre_version = (2, 80, 0)
|
||||||
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
|
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
|
||||||
capabilities = frozenset(['cover'])
|
capabilities = frozenset(['cover'])
|
||||||
@ -88,8 +104,6 @@ class GoogleImages(Source):
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from collections import OrderedDict
|
|
||||||
ans = OrderedDict()
|
|
||||||
br = self.browser
|
br = self.browser
|
||||||
q = urlencode({'as_q': ('%s %s'%(title, author)).encode('utf-8')})
|
q = urlencode({'as_q': ('%s %s'%(title, author)).encode('utf-8')})
|
||||||
if isinstance(q, bytes):
|
if isinstance(q, bytes):
|
||||||
@ -116,21 +130,17 @@ class GoogleImages(Source):
|
|||||||
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
|
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
|
||||||
# with open('/t/raw.html', 'w') as f:
|
# with open('/t/raw.html', 'w') as f:
|
||||||
# f.write(raw)
|
# f.write(raw)
|
||||||
root = parse_html(raw)
|
return parse_google_markup(raw)
|
||||||
results = root.xpath('//div/@data-tbnid') # could also use data-id
|
|
||||||
# from calibre.utils.ipython import ipython
|
|
||||||
# ipython({'root': root, 'raw': raw, 'url': url, 'results': results})
|
|
||||||
for tbnid in results:
|
|
||||||
try:
|
|
||||||
imgurl = imgurl_from_id(raw, tbnid)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
if imgurl:
|
|
||||||
ans[imgurl] = True
|
|
||||||
return list(ans)
|
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test_raw():
|
||||||
|
import sys
|
||||||
|
raw = open(sys.argv[-1]).read()
|
||||||
|
for x in parse_google_markup(raw):
|
||||||
|
print(x)
|
||||||
|
|
||||||
|
|
||||||
|
def test(title='Star Trek: Section 31: Control', authors=('David Mack',)):
|
||||||
try:
|
try:
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -141,9 +151,9 @@ def test():
|
|||||||
p = GoogleImages(None)
|
p = GoogleImages(None)
|
||||||
p.log = default_log
|
p.log = default_log
|
||||||
rq = Queue()
|
rq = Queue()
|
||||||
p.download_cover(default_log, rq, Event(), title='The Heroes',
|
p.download_cover(default_log, rq, Event(), title=title, authors=authors)
|
||||||
authors=('Joe Abercrombie',))
|
|
||||||
print('Downloaded', rq.qsize(), 'covers')
|
print('Downloaded', rq.qsize(), 'covers')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test()
|
test()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user