Resolve cover URLs early

This commit is contained in:
Kovid Goyal 2017-03-02 20:04:53 +05:30
parent 959583bcc2
commit f98dfa6775

View File

@ -1220,7 +1220,7 @@ class Amazon(Source):
domain)[len('https://'):].partition('/')[0] domain)[len('https://'):].partition('/')[0]
matches = [] matches = []
se = search_engines_module() se = search_engines_module()
cover_url_prefix = 'bing' urlproc = se.bing_url_processor
for result in se.bing_search(terms, site, log=log, br=br, timeout=timeout): for result in se.bing_search(terms, site, log=log, br=br, timeout=timeout):
if abort.is_set(): if abort.is_set():
return matches, terms, domain, None return matches, terms, domain, None
@ -1242,7 +1242,7 @@ class Amazon(Source):
log('Skipping non-book result:', result) log('Skipping non-book result:', result)
if not matches: if not matches:
log('No search engine results for terms:', ' '.join(terms)) log('No search engine results for terms:', ' '.join(terms))
return matches, terms, domain, lambda x: (cover_url_prefix + ':' + x) return matches, terms, domain, urlproc
# }}} # }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
@ -1355,15 +1355,13 @@ class Amazon(Source):
return return
log('Downloading cover from:', cached_url) log('Downloading cover from:', cached_url)
br = self.browser br = self.browser
se = search_engines_module()
url = se.resolve_url(cached_url)
if USE_SEARCH_ENGINE: if USE_SEARCH_ENGINE:
br = br.clone_browser() br = br.clone_browser()
br.set_current_header('Referer', self.referrer_for_domain(self.domain)) br.set_current_header('Referer', self.referrer_for_domain(self.domain))
try: try:
time.sleep(1) time.sleep(1)
cdata = br.open_novisit( cdata = br.open_novisit(
url, timeout=timeout).read() cached_url, timeout=timeout).read()
result_queue.put((self, cdata)) result_queue.put((self, cdata))
except: except:
log.exception('Failed to download cover from:', cached_url) log.exception('Failed to download cover from:', cached_url)