mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Google Images metadata download plugin: Fix error when searching for books with non-English characters in the title or author names. Fixes #1577036 ["KeyError" during search cover in the "Google Images Covers"](https://bugs.launchpad.net/calibre/+bug/1577036)
This commit is contained in:
parent
822c7a449c
commit
dc622318ab
@ -53,13 +53,13 @@ class GoogleImages(Source):
|
|||||||
|
|
||||||
def get_image_urls(self, title, author, log, abort, timeout):
|
def get_image_urls(self, title, author, log, abort, timeout):
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from urllib import quote_plus
|
from urllib import urlencode
|
||||||
import html5lib
|
import html5lib
|
||||||
import json
|
import json
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
ans = OrderedDict()
|
ans = OrderedDict()
|
||||||
br = self.browser
|
br = self.browser
|
||||||
q = quote_plus('%s %s'%(title, author))
|
q = urlencode({'as_q': ('%s %s'%(title, author)).encode('utf-8')}).decode('utf-8')
|
||||||
sz = self.prefs['size']
|
sz = self.prefs['size']
|
||||||
if sz == 'any':
|
if sz == 'any':
|
||||||
sz = ''
|
sz = ''
|
||||||
@ -69,7 +69,7 @@ class GoogleImages(Source):
|
|||||||
sz = 'isz:lt,islt:%s,' % sz
|
sz = 'isz:lt,islt:%s,' % sz
|
||||||
# See https://www.google.com/advanced_image_search to understand this
|
# See https://www.google.com/advanced_image_search to understand this
|
||||||
# URL scheme
|
# URL scheme
|
||||||
url = 'https://www.google.com/search?as_st=y&tbm=isch&as_q={}&as_epq=&as_oq=&as_eq=&cr=&as_sitesearch=&safe=images&tbs={}iar:t,ift:jpg'.format(q, sz)
|
url = 'https://www.google.com/search?as_st=y&tbm=isch&{}&as_epq=&as_oq=&as_eq=&cr=&as_sitesearch=&safe=images&tbs={}iar:t,ift:jpg'.format(q, sz)
|
||||||
log('Search URL: ' + url)
|
log('Search URL: ' + url)
|
||||||
raw = br.open(url).read().decode('utf-8')
|
raw = br.open(url).read().decode('utf-8')
|
||||||
root = html5lib.parse(clean_ascii_chars(raw), treebuilder='lxml', namespaceHTMLElements=False)
|
root = html5lib.parse(clean_ascii_chars(raw), treebuilder='lxml', namespaceHTMLElements=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user