From 19c8784ad92ec6778c9d8a9398df3034752468e8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 5 Jun 2016 11:20:44 +0530
Subject: [PATCH] Use a single user agent string for amazon

Amazon returns CAPTCHA pages based on user agent sniffing, so use a
common user agent (IE II)
---
 src/calibre/ebooks/metadata/sources/amazon.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 294ad92035..f50deeec85 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -12,13 +12,16 @@ from threading import Thread
 from Queue import Queue, Empty
 
 
-from calibre import as_unicode, random_user_agent
+from calibre import as_unicode
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
         fixauthors)
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.localization import canonicalize_lang
 
+class CaptchaError(Exception):
+    pass
+
 def parse_details_page(url, log, timeout, browser, domain):
     from calibre.utils.cleantext import clean_ascii_chars
     from calibre.ebooks.chardet import xml_to_unicode
@@ -299,6 +302,8 @@ class Worker(Thread):  # Get details {{{
 
     def parse_details(self, raw, root):
         asin = parse_asin(root, self.log, self.url)
+        if not asin and root.xpath('//form[@action="/errors/validateCaptcha"]'):
+            raise CaptchaError('Amazon returned a CAPTCHA page, probably because you downloaded too many books. Wait for some time and try again.')
         if self.testing:
             import tempfile, uuid
             with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
@@ -764,9 +769,7 @@ class Amazon(Source):
 
     @property
     def user_agent(self):
-        # Pass in an index to random_user_agent() to test with a particular
-        # user agent
-        return random_user_agent()
+        return 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0;  rv:11.0) like Gecko'
 
     def save_settings(self, *args, **kwargs):
         Source.save_settings(self, *args, **kwargs)
@@ -985,6 +988,8 @@ class Amazon(Source):
                             url = 'http://www.amazon.%s%s' % (self.get_website_domain(domain), url)
                         matches.append(url)
                     break
+        if not matches and root.xpath('//form[@action="/errors/validateCaptcha"]'):
+            raise CaptchaError('Amazon returned a CAPTCHA page, probably because you downloaded too many books. Wait for some time and try again.')
 
         # Keep only the top 5 matches as the matches are sorted by relevance by
         # Amazon so lower matches are not likely to be very relevant