From ae1d2874d87f91237febed395a14833bdbc6d66b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 21 Sep 2013 08:15:07 +0530 Subject: [PATCH] Handle yet another amazon website change --- src/calibre/ebooks/metadata/sources/amazon.py | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 44241f5c10..4c8990ff66 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -748,6 +748,16 @@ class Amazon(Source): mi.tags = list(map(fixcase, mi.tags)) mi.isbn = check_isbn(mi.isbn) + def get_website_domain(self, domain): + udomain = domain + if domain == 'uk': + udomain = 'co.uk' + elif domain == 'jp': + udomain = 'co.jp' + elif domain == 'br': + udomain = 'com.br' + return udomain + def create_query(self, log, title=None, authors=None, identifiers={}, # {{{ domain=None): from urllib import urlencode @@ -803,14 +813,7 @@ class Amazon(Source): encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to, 'ignore')) for x, y in q.iteritems()]) - udomain = domain - if domain == 'uk': - udomain = 'co.uk' - elif domain == 'jp': - udomain = 'co.jp' - elif domain == 'br': - udomain = 'com.br' - url = 'http://www.amazon.%s/s/?'%udomain + urlencode(encoded_q) + url = 'http://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q) return url, domain # }}} @@ -828,7 +831,7 @@ class Amazon(Source): return url # }}} - def parse_results_page(self, root): # {{{ + def parse_results_page(self, root, domain): # {{{ from lxml.html import tostring matches = [] @@ -851,7 +854,10 @@ class Amazon(Source): for a in links: title = tostring(a, method='text', encoding=unicode) if title_ok(title): - matches.append(a.get('href')) + url = a.get('href') + if url.startswith('/'): + url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url) + matches.append(url) break if not matches: @@ -862,7 +868,10 @@ class Amazon(Source): for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'): title = tostring(a, method='text', encoding=unicode) if title_ok(title): - matches.append(a.get('href')) + url = a.get('href') + if url.startswith('/'): + url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url) + matches.append(url) break # Keep only the top 5 matches as the matches are sorted by relevance by @@ -938,7 +947,7 @@ class Amazon(Source): found = False if found: - matches = self.parse_results_page(root) + matches = self.parse_results_page(root, domain) if abort.is_set(): return