From 5e2fc67b981eddf8728adf22664c72cb7c808da5 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 25 Jul 2011 12:15:25 +0100
Subject: [PATCH] Amazon.co.uk and amazon.de completely their search results.

---
 .../gui2/store/stores/amazon_de_plugin.py     | 50 +++++++---------
 .../gui2/store/stores/amazon_uk_plugin.py     | 57 +++++++++++--------
 2 files changed, 53 insertions(+), 54 deletions(-)
diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py
index 88ccbdbded..33f681ab52 100644
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@@ -45,24 +45,26 @@ class AmazonDEKindleStore(StorePlugin):
             doc = html.fromstring(f.read())
 
             # Amazon has two results pages.
-            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            # Horizontal grid of books.
-            if is_shot:
-                data_xpath = '//div[contains(@class, "result")]'
-                format_xpath = './/div[@class="productTitle"]/text()'
-                cover_xpath = './/div[@class="productTitle"]//img/@src'
-            # Vertical list of books.
-            else:
-                data_xpath = '//div[@class="productData"]'
-                format_xpath = './/span[@class="format"]/text()'
-                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+            # 20110725: seems that is_shot is gone.
+#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+#            # Horizontal grid of books.
+#            if is_shot:
+#                data_xpath = '//div[contains(@class, "result")]'
+#                format_xpath = './/div[@class="productTitle"]/text()'
+#                cover_xpath = './/div[@class="productTitle"]//img/@src'
+#            # Vertical list of books.
+#            else:
+            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            format_xpath = './/span[@class="format"]/text()'
+            cover_xpath = './/img[@class="productImage"]/@src'
+# end is_shot else
 
             for data in doc.xpath(data_xpath):
                 if counter <= 0:
                     break
 
                 # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). Se we need
+                # put in results for non Kindle books (author pages). So we need
                 # to explicitly check if the item is a Kindle book and ignore it
                 # if it isn't.
                 format = ''.join(data.xpath(format_xpath))
@@ -71,28 +73,18 @@ class AmazonDEKindleStore(StorePlugin):
 
                 # We must have an asin otherwise we can't easily reference the
                 # book later.
-                asin_href = None
-                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
-                if asin_a:
-                    asin_href = asin_a[0].get('href', '')
-                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
-                    if m:
-                        asin = m.group('asin')
-                    else:
-                        continue
-                else:
-                    continue
+                asin = ''.join(data.xpath("@name"))
 
                 cover_url = ''.join(data.xpath(cover_xpath))
 
-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                 price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 
-                if is_shot:
-                    author = format.split(' von ')[-1]
-                else:
-                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
-                    author = author.split(' von ')[-1]
+#                if is_shot:
+#                    author = format.split(' von ')[-1]
+#                else:
+                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
+                author = author.split('von ')[-1]
 
                 counter -= 1
 
diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
index f8686d19fe..86603f3fc3 100644
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@@ -42,49 +42,56 @@ class AmazonUKKindleStore(StorePlugin):
             doc = html.fromstring(f.read())
 
             # Amazon has two results pages.
-            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            # Horizontal grid of books.
-            if is_shot:
-                data_xpath = '//div[contains(@class, "result")]'
-                cover_xpath = './/div[@class="productTitle"]//img/@src'
-            # Vertical list of books.
-            else:
-                data_xpath = '//div[contains(@class, "product")]'
-                cover_xpath = './div[@class="productImage"]/a/img/@src'
+            # 20110725: seems that is_shot is gone.
+#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+#            # Horizontal grid of books.
+#            if is_shot:
+#                data_xpath = '//div[contains(@class, "result")]'
+#                format_xpath = './/div[@class="productTitle"]/text()'
+#                cover_xpath = './/div[@class="productTitle"]//img/@src'
+#            # Vertical list of books.
+#            else:
+            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            format_xpath = './/span[@class="format"]/text()'
+            cover_xpath = './/img[@class="productImage"]/@src'
+# end is_shot else
 
             for data in doc.xpath(data_xpath):
                 if counter <= 0:
                     break
 
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). So we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
+                    continue
+
                 # We must have an asin otherwise we can't easily reference the
                 # book later.
-                asin = ''.join(data.xpath('./@name'))
-                if not asin:
-                    continue
+                asin = ''.join(data.xpath("@name"))
+
                 cover_url = ''.join(data.xpath(cover_xpath))
 
-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                 price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 
+#                if is_shot:
+#                    author = format.split(' von ')[-1]
+#                else:
+                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
+                author = author.split('by ')[-1]
+
                 counter -= 1
 
                 s = SearchResult()
                 s.cover_url = cover_url.strip()
                 s.title = title.strip()
+                s.author = author.strip()
                 s.price = price.strip()
                 s.detail_item = asin.strip()
-                s.formats = ''
-
-                if is_shot:
-                    # Amazon UK does not include the author on the grid layout
-                    s.author = ''
-                    self.get_details(s, timeout)
-                    if s.formats != 'Kindle':
-                        continue
-                else:
-                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
-                    s.author = author.split(' by ')[-1].strip()
-                    s.formats = 'Kindle'
+                s.formats = 'Kindle'
 
                 yield s