diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 61b555b041..d1c8f24da6 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -64,7 +64,7 @@ class Worker(Thread): # Get details {{{
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
- # open('/t/t.html', 'wb').write(raw)
+ #open('/t/t.html', 'wb').write(raw)
if '
404 - ' in raw:
self.log.error('URL malformed: %r'%self.url)
@@ -218,6 +218,9 @@ class Worker(Thread): # Get details {{{
' @class="emptyClear" or @href]'):
c.getparent().remove(c)
desc = tostring(desc, method='html', encoding=unicode).strip()
+ # Encoding bug in Amazon data U+fffd (replacement char)
+ # in some examples it is present in place of '
+ desc = desc.replace('\ufffd', "'")
# remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Collapse whitespace
@@ -410,6 +413,18 @@ class Amazon(Source):
if 'bulk pack' not in title:
matches.append(a.get('href'))
break
+ if not matches:
+ # This can happen for some user agents that Amazon thinks are
+ # mobile/less capable
+ log('Trying alternate results page markup')
+ for td in root.xpath(
+ r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
+ for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
+ title = tostring(a, method='text', encoding=unicode).lower()
+ if 'bulk pack' not in title:
+ matches.append(a.get('href'))
+ break
+
# Keep only the top 5 matches as the matches are sorted by relevance by
# Amazon so lower matches are not likely to be very relevant