Amazon metadata plugin: Fix metadata for adult books not being downloaded because google does not cache those pages.

This commit is contained in:
Kovid Goyal 2018-04-15 07:57:44 +05:30
parent 449191df19
commit 6eb67b9b80
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -839,7 +839,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 2, 1) version = (1, 2, 2)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')
@ -1264,7 +1264,7 @@ class Amazon(Source):
return matches, query, domain, None return matches, query, domain, None
# }}} # }}}
def search_search_engine(self, br, testing, log, abort, title, authors, identifiers, timeout): # {{{ def search_search_engine(self, br, testing, log, abort, title, authors, identifiers, timeout, override_server=None): # {{{
from calibre.ebooks.metadata.sources.update import search_engines_module from calibre.ebooks.metadata.sources.update import search_engines_module
terms, domain = self.create_query(log, title=title, authors=authors, terms, domain = self.create_query(log, title=title, authors=authors,
identifiers=identifiers, for_amazon=False) identifiers=identifiers, for_amazon=False)
@ -1272,7 +1272,7 @@ class Amazon(Source):
domain)[len('https://'):].partition('/')[0] domain)[len('https://'):].partition('/')[0]
matches = [] matches = []
se = search_engines_module() se = search_engines_module()
server = self.server server = override_server or self.server
if server in ('bing',): if server in ('bing',):
urlproc, sfunc = se.bing_url_processor, se.bing_search urlproc, sfunc = se.bing_url_processor, se.bing_search
elif server in ('auto', 'google'): elif server in ('auto', 'google'):
@ -1302,6 +1302,10 @@ class Amazon(Source):
log('Skipping non-book result:', result) log('Skipping non-book result:', result)
if not matches: if not matches:
log('No search engine results for terms:', ' '.join(terms)) log('No search engine results for terms:', ' '.join(terms))
if urlproc is se.google_url_processor:
# Google does not cache adult titles
log('Trying the bing search engine instead')
return self.search_search_engine(br, testing, log, abort, title, authors, identifiers, timeout, 'bing')
return matches, terms, domain, urlproc return matches, terms, domain, urlproc
# }}} # }}}