mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle yet another amazon website change
This commit is contained in:
parent
3d3cb1fdb9
commit
ae1d2874d8
@ -748,6 +748,16 @@ class Amazon(Source):
|
|||||||
mi.tags = list(map(fixcase, mi.tags))
|
mi.tags = list(map(fixcase, mi.tags))
|
||||||
mi.isbn = check_isbn(mi.isbn)
|
mi.isbn = check_isbn(mi.isbn)
|
||||||
|
|
||||||
|
def get_website_domain(self, domain):
|
||||||
|
udomain = domain
|
||||||
|
if domain == 'uk':
|
||||||
|
udomain = 'co.uk'
|
||||||
|
elif domain == 'jp':
|
||||||
|
udomain = 'co.jp'
|
||||||
|
elif domain == 'br':
|
||||||
|
udomain = 'com.br'
|
||||||
|
return udomain
|
||||||
|
|
||||||
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
||||||
domain=None):
|
domain=None):
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
@ -803,14 +813,7 @@ class Amazon(Source):
|
|||||||
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
||||||
'ignore')) for x, y in
|
'ignore')) for x, y in
|
||||||
q.iteritems()])
|
q.iteritems()])
|
||||||
udomain = domain
|
url = 'http://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q)
|
||||||
if domain == 'uk':
|
|
||||||
udomain = 'co.uk'
|
|
||||||
elif domain == 'jp':
|
|
||||||
udomain = 'co.jp'
|
|
||||||
elif domain == 'br':
|
|
||||||
udomain = 'com.br'
|
|
||||||
url = 'http://www.amazon.%s/s/?'%udomain + urlencode(encoded_q)
|
|
||||||
return url, domain
|
return url, domain
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
@ -828,7 +831,7 @@ class Amazon(Source):
|
|||||||
return url
|
return url
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def parse_results_page(self, root): # {{{
|
def parse_results_page(self, root, domain): # {{{
|
||||||
from lxml.html import tostring
|
from lxml.html import tostring
|
||||||
|
|
||||||
matches = []
|
matches = []
|
||||||
@ -851,7 +854,10 @@ class Amazon(Source):
|
|||||||
for a in links:
|
for a in links:
|
||||||
title = tostring(a, method='text', encoding=unicode)
|
title = tostring(a, method='text', encoding=unicode)
|
||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
matches.append(a.get('href'))
|
url = a.get('href')
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
||||||
|
matches.append(url)
|
||||||
break
|
break
|
||||||
|
|
||||||
if not matches:
|
if not matches:
|
||||||
@ -862,7 +868,10 @@ class Amazon(Source):
|
|||||||
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
||||||
title = tostring(a, method='text', encoding=unicode)
|
title = tostring(a, method='text', encoding=unicode)
|
||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
matches.append(a.get('href'))
|
url = a.get('href')
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
|
||||||
|
matches.append(url)
|
||||||
break
|
break
|
||||||
|
|
||||||
# Keep only the top 5 matches as the matches are sorted by relevance by
|
# Keep only the top 5 matches as the matches are sorted by relevance by
|
||||||
@ -938,7 +947,7 @@ class Amazon(Source):
|
|||||||
found = False
|
found = False
|
||||||
|
|
||||||
if found:
|
if found:
|
||||||
matches = self.parse_results_page(root)
|
matches = self.parse_results_page(root, domain)
|
||||||
|
|
||||||
if abort.is_set():
|
if abort.is_set():
|
||||||
return
|
return
|
||||||
|
Loading…
x
Reference in New Issue
Block a user