Make amazon EU store plugins more robust against amazon reporting zero books found.

2025-11-27 00:35:00 -05:00 · 2013-04-01 10:44:35 +02:00 · 2013-04-01 10:44:35 +02:00 · 7169ffed0a
commit 7169ffed0a
parent 40dbdc75ae
5 changed files with 290 additions and 230 deletions
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -7,7 +7,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, time
 from contextlib import closing
 from lxml import html
@ -29,6 +29,9 @@ class AmazonEUBase(StorePlugin):
    For comments on the implementation, please see amazon_plugin.py
    '''
    MAX_SEARCH_ATTEMPTS = 5
    SLEEP_BETWEEN_ATTEMPTS = 3
    def open(self, parent=None, detail_item=None, external=False):
        store_link = self.store_link % self.aff_id
@ -42,62 +45,71 @@ class AmazonEUBase(StorePlugin):
        br = browser()
        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+        loops = 0
-            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+        while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
            br = browser()
            if loops > 0:
                print ("Retry getbooks search", self.__class__.__name__, counter,
                       max_results, loops)
                time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
            loops += 1
-            data_xpath = '//div[contains(@class, "prod")]'
+            with closing(br.open(url, timeout=timeout)) as f:
-            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+                doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-            for data in doc.xpath(data_xpath):
+                data_xpath = '//div[contains(@class, "prod")]'
-                if counter <= 0:
+                format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-                    break
+                asin_xpath = '@name'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/h3[@class="newaps"]/a//text()'
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
                price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-                # Even though we are searching digital-text only Amazon will still
+                for data in doc.xpath(data_xpath):
-                # put in results for non Kindle books (authors pages). Se we need
+                    if counter <= 0:
-                # to explicitly check if the item is a Kindle book and ignore it
+                        break
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue
-                # We must have an asin otherwise we can't easily reference the
+                    # Even though we are searching digital-text only Amazon will still
-                # book later.
+                    # put in results for non Kindle books (authors pages). Se we need
-                asin = data.xpath(asin_xpath)
+                    # to explicitly check if the item is a Kindle book and ignore it
-                if asin:
+                    # if it isn't.
-                    asin = asin[0]
+                    format_ = ''.join(data.xpath(format_xpath))
-                else:
+                    if 'kindle' not in format_.lower():
-                    continue
+                        continue
-                cover_url = ''.join(data.xpath(cover_xpath))
+                    # We must have an asin otherwise we can't easily reference the
                    # book later.
                    asin = data.xpath(asin_xpath)
                    if asin:
                        asin = asin[0]
                    else:
                        continue
-                title = ''.join(data.xpath(title_xpath))
+                    cover_url = ''.join(data.xpath(cover_xpath))
-                authors = ''.join(data.xpath(author_xpath))
+                    title = ''.join(data.xpath(title_xpath))
                authors = re.sub('^' + self.author_article, '', authors)
                authors = re.sub(self.and_word, ' & ', authors)
                mo = re.match(r'(.*)(\(\d.*)$', authors)
                if mo:
                    authors = mo.group(1).strip()
-                price = ''.join(data.xpath(price_xpath))
+                    authors = ''.join(data.xpath(author_xpath))
                    authors = re.sub('^' + self.author_article, '', authors)
                    authors = re.sub(self.and_word, ' & ', authors)
                    mo = re.match(r'(.*)(\(\d.*)$', authors)
                    if mo:
                        authors = mo.group(1).strip()
-                counter -= 1
+                    price = ''.join(data.xpath(price_xpath))
-                s = SearchResult()
+                    counter -= 1
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = authors.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
-                yield s
+                    s = SearchResult()
                    s.cover_url = cover_url.strip()
                    s.title = title.strip()
                    s.author = authors.strip()
                    s.price = price.strip()
                    s.detail_item = asin.strip()
                    s.drm = SearchResult.DRM_UNKNOWN
                    s.formats = 'Kindle'
                    yield s
    def get_details(self, search_result, timeout):
        pass
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -7,7 +7,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, time
 from contextlib import closing
 from lxml import html
@ -28,6 +28,9 @@ class AmazonEUBase(StorePlugin):
    For comments on the implementation, please see amazon_plugin.py
    '''
    MAX_SEARCH_ATTEMPTS = 5
    SLEEP_BETWEEN_ATTEMPTS = 3
    def open(self, parent=None, detail_item=None, external=False):
        store_link = self.store_link % self.aff_id
@ -41,62 +44,71 @@ class AmazonEUBase(StorePlugin):
        br = browser()
        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+        loops = 0
-            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+        while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
            br = browser()
            if loops > 0:
                print ("Retry getbooks search", self.__class__.__name__, counter,
                       max_results, loops)
                time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
            loops += 1
-            data_xpath = '//div[contains(@class, "prod")]'
+            with closing(br.open(url, timeout=timeout)) as f:
-            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+                doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-            for data in doc.xpath(data_xpath):
+                data_xpath = '//div[contains(@class, "prod")]'
-                if counter <= 0:
+                format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-                    break
+                asin_xpath = '@name'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/h3[@class="newaps"]/a//text()'
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
                price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-                # Even though we are searching digital-text only Amazon will still
+                for data in doc.xpath(data_xpath):
-                # put in results for non Kindle books (authors pages). Se we need
+                    if counter <= 0:
-                # to explicitly check if the item is a Kindle book and ignore it
+                        break
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue
-                # We must have an asin otherwise we can't easily reference the
+                    # Even though we are searching digital-text only Amazon will still
-                # book later.
+                    # put in results for non Kindle books (authors pages). Se we need
-                asin = data.xpath(asin_xpath)
+                    # to explicitly check if the item is a Kindle book and ignore it
-                if asin:
+                    # if it isn't.
-                    asin = asin[0]
+                    format_ = ''.join(data.xpath(format_xpath))
-                else:
+                    if 'kindle' not in format_.lower():
-                    continue
+                        continue
-                cover_url = ''.join(data.xpath(cover_xpath))
+                    # We must have an asin otherwise we can't easily reference the
                    # book later.
                    asin = data.xpath(asin_xpath)
                    if asin:
                        asin = asin[0]
                    else:
                        continue
-                title = ''.join(data.xpath(title_xpath))
+                    cover_url = ''.join(data.xpath(cover_xpath))
-                authors = ''.join(data.xpath(author_xpath))
+                    title = ''.join(data.xpath(title_xpath))
                authors = re.sub('^' + self.author_article, '', authors)
                authors = re.sub(self.and_word, ' & ', authors)
                mo = re.match(r'(.*)(\(\d.*)$', authors)
                if mo:
                    authors = mo.group(1).strip()
-                price = ''.join(data.xpath(price_xpath))
+                    authors = ''.join(data.xpath(author_xpath))
                    authors = re.sub('^' + self.author_article, '', authors)
                    authors = re.sub(self.and_word, ' & ', authors)
                    mo = re.match(r'(.*)(\(\d.*)$', authors)
                    if mo:
                        authors = mo.group(1).strip()
-                counter -= 1
+                    price = ''.join(data.xpath(price_xpath))
-                s = SearchResult()
+                    counter -= 1
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = authors.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
-                yield s
+                    s = SearchResult()
                    s.cover_url = cover_url.strip()
                    s.title = title.strip()
                    s.author = authors.strip()
                    s.price = price.strip()
                    s.detail_item = asin.strip()
                    s.drm = SearchResult.DRM_UNKNOWN
                    s.formats = 'Kindle'
                    yield s
    def get_details(self, search_result, timeout):
        pass
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -7,7 +7,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, time
 from contextlib import closing
 from lxml import html
@ -29,6 +29,9 @@ class AmazonEUBase(StorePlugin):
    For comments on the implementation, please see amazon_plugin.py
    '''
    MAX_SEARCH_ATTEMPTS = 5
    SLEEP_BETWEEN_ATTEMPTS = 3
    def open(self, parent=None, detail_item=None, external=False):
        store_link = self.store_link % self.aff_id
@ -42,62 +45,71 @@ class AmazonEUBase(StorePlugin):
        br = browser()
        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+        loops = 0
-            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+        while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
            br = browser()
            if loops > 0:
                print ("Retry getbooks search", self.__class__.__name__, counter,
                       max_results, loops)
                time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
            loops += 1
-            data_xpath = '//div[contains(@class, "prod")]'
+            with closing(br.open(url, timeout=timeout)) as f:
-            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+                doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-            for data in doc.xpath(data_xpath):
+                data_xpath = '//div[contains(@class, "prod")]'
-                if counter <= 0:
+                format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-                    break
+                asin_xpath = '@name'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/h3[@class="newaps"]/a//text()'
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
                price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-                # Even though we are searching digital-text only Amazon will still
+                for data in doc.xpath(data_xpath):
-                # put in results for non Kindle books (authors pages). Se we need
+                    if counter <= 0:
-                # to explicitly check if the item is a Kindle book and ignore it
+                        break
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue
-                # We must have an asin otherwise we can't easily reference the
+                    # Even though we are searching digital-text only Amazon will still
-                # book later.
+                    # put in results for non Kindle books (authors pages). Se we need
-                asin = data.xpath(asin_xpath)
+                    # to explicitly check if the item is a Kindle book and ignore it
-                if asin:
+                    # if it isn't.
-                    asin = asin[0]
+                    format_ = ''.join(data.xpath(format_xpath))
-                else:
+                    if 'kindle' not in format_.lower():
-                    continue
+                        continue
-                cover_url = ''.join(data.xpath(cover_xpath))
+                    # We must have an asin otherwise we can't easily reference the
                    # book later.
                    asin = data.xpath(asin_xpath)
                    if asin:
                        asin = asin[0]
                    else:
                        continue
-                title = ''.join(data.xpath(title_xpath))
+                    cover_url = ''.join(data.xpath(cover_xpath))
-                authors = ''.join(data.xpath(author_xpath))
+                    title = ''.join(data.xpath(title_xpath))
                authors = re.sub('^' + self.author_article, '', authors)
                authors = re.sub(self.and_word, ' & ', authors)
                mo = re.match(r'(.*)(\(\d.*)$', authors)
                if mo:
                    authors = mo.group(1).strip()
-                price = ''.join(data.xpath(price_xpath))
+                    authors = ''.join(data.xpath(author_xpath))
                    authors = re.sub('^' + self.author_article, '', authors)
                    authors = re.sub(self.and_word, ' & ', authors)
                    mo = re.match(r'(.*)(\(\d.*)$', authors)
                    if mo:
                        authors = mo.group(1).strip()
-                counter -= 1
+                    price = ''.join(data.xpath(price_xpath))
-                s = SearchResult()
+                    counter -= 1
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = authors.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
-                yield s
+                    s = SearchResult()
                    s.cover_url = cover_url.strip()
                    s.title = title.strip()
                    s.author = authors.strip()
                    s.price = price.strip()
                    s.detail_item = asin.strip()
                    s.drm = SearchResult.DRM_UNKNOWN
                    s.formats = 'Kindle'
                    yield s
    def get_details(self, search_result, timeout):
        pass
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -7,7 +7,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, time
 from contextlib import closing
 from lxml import html
@ -28,6 +28,9 @@ class AmazonEUBase(StorePlugin):
    For comments on the implementation, please see amazon_plugin.py
    '''
    MAX_SEARCH_ATTEMPTS = 5
    SLEEP_BETWEEN_ATTEMPTS = 3
    def open(self, parent=None, detail_item=None, external=False):
        store_link = self.store_link % self.aff_id
@ -41,62 +44,71 @@ class AmazonEUBase(StorePlugin):
        br = browser()
        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+        loops = 0
-            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+        while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
            br = browser()
            if loops > 0:
                print ("Retry getbooks search", self.__class__.__name__, counter,
                       max_results, loops)
                time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
            loops += 1
-            data_xpath = '//div[contains(@class, "prod")]'
+            with closing(br.open(url, timeout=timeout)) as f:
-            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+                doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-            for data in doc.xpath(data_xpath):
+                data_xpath = '//div[contains(@class, "prod")]'
-                if counter <= 0:
+                format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-                    break
+                asin_xpath = '@name'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/h3[@class="newaps"]/a//text()'
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
                price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-                # Even though we are searching digital-text only Amazon will still
+                for data in doc.xpath(data_xpath):
-                # put in results for non Kindle books (authors pages). Se we need
+                    if counter <= 0:
-                # to explicitly check if the item is a Kindle book and ignore it
+                        break
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue
-                # We must have an asin otherwise we can't easily reference the
+                    # Even though we are searching digital-text only Amazon will still
-                # book later.
+                    # put in results for non Kindle books (authors pages). Se we need
-                asin = data.xpath(asin_xpath)
+                    # to explicitly check if the item is a Kindle book and ignore it
-                if asin:
+                    # if it isn't.
-                    asin = asin[0]
+                    format_ = ''.join(data.xpath(format_xpath))
-                else:
+                    if 'kindle' not in format_.lower():
-                    continue
+                        continue
-                cover_url = ''.join(data.xpath(cover_xpath))
+                    # We must have an asin otherwise we can't easily reference the
                    # book later.
                    asin = data.xpath(asin_xpath)
                    if asin:
                        asin = asin[0]
                    else:
                        continue
-                title = ''.join(data.xpath(title_xpath))
+                    cover_url = ''.join(data.xpath(cover_xpath))
-                authors = ''.join(data.xpath(author_xpath))
+                    title = ''.join(data.xpath(title_xpath))
                authors = re.sub('^' + self.author_article, '', authors)
                authors = re.sub(self.and_word, ' & ', authors)
                mo = re.match(r'(.*)(\(\d.*)$', authors)
                if mo:
                    authors = mo.group(1).strip()
-                price = ''.join(data.xpath(price_xpath))
+                    authors = ''.join(data.xpath(author_xpath))
                    authors = re.sub('^' + self.author_article, '', authors)
                    authors = re.sub(self.and_word, ' & ', authors)
                    mo = re.match(r'(.*)(\(\d.*)$', authors)
                    if mo:
                        authors = mo.group(1).strip()
-                counter -= 1
+                    price = ''.join(data.xpath(price_xpath))
-                s = SearchResult()
+                    counter -= 1
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = authors.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
-                yield s
+                    s = SearchResult()
                    s.cover_url = cover_url.strip()
                    s.title = title.strip()
                    s.author = authors.strip()
                    s.price = price.strip()
                    s.detail_item = asin.strip()
                    s.drm = SearchResult.DRM_UNKNOWN
                    s.formats = 'Kindle'
                    yield s
    def get_details(self, search_result, timeout):
        pass
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -7,7 +7,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re
+import re, time
 from contextlib import closing
 from lxml import html
@ -28,6 +28,9 @@ class AmazonEUBase(StorePlugin):
    For comments on the implementation, please see amazon_plugin.py
    '''
    MAX_SEARCH_ATTEMPTS = 5
    SLEEP_BETWEEN_ATTEMPTS = 3
    def open(self, parent=None, detail_item=None, external=False):
        store_link = self.store_link % self.aff_id
@ -41,62 +44,71 @@ class AmazonEUBase(StorePlugin):
        br = browser()
        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+        loops = 0
-            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
+        while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
            br = browser()
            if loops > 0:
                print ("Retry getbooks search", self.__class__.__name__, counter,
                       max_results, loops)
                time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
            loops += 1
-            data_xpath = '//div[contains(@class, "prod")]'
+            with closing(br.open(url, timeout=timeout)) as f:
-            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+                doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-            for data in doc.xpath(data_xpath):
+                data_xpath = '//div[contains(@class, "prod")]'
-                if counter <= 0:
+                format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
-                    break
+                asin_xpath = '@name'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/h3[@class="newaps"]/a//text()'
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
                price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
-                # Even though we are searching digital-text only Amazon will still
+                for data in doc.xpath(data_xpath):
-                # put in results for non Kindle books (authors pages). Se we need
+                    if counter <= 0:
-                # to explicitly check if the item is a Kindle book and ignore it
+                        break
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue
-                # We must have an asin otherwise we can't easily reference the
+                    # Even though we are searching digital-text only Amazon will still
-                # book later.
+                    # put in results for non Kindle books (authors pages). Se we need
-                asin = data.xpath(asin_xpath)
+                    # to explicitly check if the item is a Kindle book and ignore it
-                if asin:
+                    # if it isn't.
-                    asin = asin[0]
+                    format_ = ''.join(data.xpath(format_xpath))
-                else:
+                    if 'kindle' not in format_.lower():
-                    continue
+                        continue
-                cover_url = ''.join(data.xpath(cover_xpath))
+                    # We must have an asin otherwise we can't easily reference the
                    # book later.
                    asin = data.xpath(asin_xpath)
                    if asin:
                        asin = asin[0]
                    else:
                        continue
-                title = ''.join(data.xpath(title_xpath))
+                    cover_url = ''.join(data.xpath(cover_xpath))
-                authors = ''.join(data.xpath(author_xpath))
+                    title = ''.join(data.xpath(title_xpath))
                authors = re.sub('^' + self.author_article, '', authors)
                authors = re.sub(self.and_word, ' & ', authors)
                mo = re.match(r'(.*)(\(\d.*)$', authors)
                if mo:
                    authors = mo.group(1).strip()
-                price = ''.join(data.xpath(price_xpath))
+                    authors = ''.join(data.xpath(author_xpath))
                    authors = re.sub('^' + self.author_article, '', authors)
                    authors = re.sub(self.and_word, ' & ', authors)
                    mo = re.match(r'(.*)(\(\d.*)$', authors)
                    if mo:
                        authors = mo.group(1).strip()
-                counter -= 1
+                    price = ''.join(data.xpath(price_xpath))
-                s = SearchResult()
+                    counter -= 1
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = authors.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
-                yield s
+                    s = SearchResult()
                    s.cover_url = cover_url.strip()
                    s.title = title.strip()
                    s.author = authors.strip()
                    s.price = price.strip()
                    s.detail_item = asin.strip()
                    s.drm = SearchResult.DRM_UNKNOWN
                    s.formats = 'Kindle'
                    yield s
    def get_details(self, search_result, timeout):
        pass