mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Small speed up for amazon metadata download by avoiding the redirect from http to https
This commit is contained in:
parent
b003cd6f81
commit
6333859f09
@ -62,7 +62,7 @@ def parse_details_page(url, log, timeout, browser, domain):
|
|||||||
if domain == 'jp':
|
if domain == 'jp':
|
||||||
for a in root.xpath('//a[@href]'):
|
for a in root.xpath('//a[@href]'):
|
||||||
if 'black-curtain-redirect.html' in a.get('href'):
|
if 'black-curtain-redirect.html' in a.get('href'):
|
||||||
url = 'http://amazon.co.jp'+a.get('href')
|
url = 'https://amazon.co.jp'+a.get('href')
|
||||||
log('Black curtain redirect found, following')
|
log('Black curtain redirect found, following')
|
||||||
return parse_details_page(url, log, timeout, browser, domain)
|
return parse_details_page(url, log, timeout, browser, domain)
|
||||||
|
|
||||||
@ -669,7 +669,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
if 'data:' in src:
|
if 'data:' in src:
|
||||||
continue
|
continue
|
||||||
if 'loading-' in src:
|
if 'loading-' in src:
|
||||||
js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
|
js_img = re.search(br'"largeImage":"(https?://[^"]+)",',raw)
|
||||||
if js_img:
|
if js_img:
|
||||||
src = js_img.group(1).decode('utf-8')
|
src = js_img.group(1).decode('utf-8')
|
||||||
if ('/no-image-avail' not in src and 'loading-' not in src and '/no-img-sm' not in src):
|
if ('/no-image-avail' not in src and 'loading-' not in src and '/no-img-sm' not in src):
|
||||||
@ -832,13 +832,13 @@ class Amazon(Source):
|
|||||||
if domain and asin:
|
if domain and asin:
|
||||||
url = None
|
url = None
|
||||||
if domain == 'com':
|
if domain == 'com':
|
||||||
url = 'http://amzn.com/'+asin
|
url = 'https://amzn.com/'+asin
|
||||||
elif domain == 'uk':
|
elif domain == 'uk':
|
||||||
url = 'http://www.amazon.co.uk/dp/'+asin
|
url = 'https://www.amazon.co.uk/dp/'+asin
|
||||||
elif domain == 'br':
|
elif domain == 'br':
|
||||||
url = 'http://www.amazon.com.br/dp/'+asin
|
url = 'https://www.amazon.com.br/dp/'+asin
|
||||||
else:
|
else:
|
||||||
url = 'http://www.amazon.%s/dp/%s'%(domain, asin)
|
url = 'https://www.amazon.%s/dp/%s'%(domain, asin)
|
||||||
if url:
|
if url:
|
||||||
idtype = 'amazon' if domain == 'com' else 'amazon_'+domain
|
idtype = 'amazon' if domain == 'com' else 'amazon_'+domain
|
||||||
return domain, idtype, asin, url
|
return domain, idtype, asin, url
|
||||||
@ -964,7 +964,7 @@ class Amazon(Source):
|
|||||||
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
||||||
'ignore')) for x, y in
|
'ignore')) for x, y in
|
||||||
q.iteritems()])
|
q.iteritems()])
|
||||||
url = 'http://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q)
|
url = 'https://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q)
|
||||||
return url, domain
|
return url, domain
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
@ -1005,7 +1005,7 @@ class Amazon(Source):
|
|||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
url = 'https://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
||||||
matches.append(url)
|
matches.append(url)
|
||||||
|
|
||||||
if not matches:
|
if not matches:
|
||||||
@ -1020,7 +1020,7 @@ class Amazon(Source):
|
|||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
url = 'https://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
||||||
matches.append(url)
|
matches.append(url)
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -1034,7 +1034,7 @@ class Amazon(Source):
|
|||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
url = 'https://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
||||||
matches.append(url)
|
matches.append(url)
|
||||||
break
|
break
|
||||||
if not matches and root.xpath('//form[@action="/errors/validateCaptcha"]'):
|
if not matches and root.xpath('//form[@action="/errors/validateCaptcha"]'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user