From ae724b9ef686a3e314a6e8307adb5688dd610a64 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 6 Jun 2011 12:20:05 +0100 Subject: [PATCH] Split amazon UK and DE away from US, making each a separate plugin --- src/calibre/gui2/store/amazon_de_plugin.py | 107 +++++++++++++++++++-- src/calibre/gui2/store/amazon_uk_plugin.py | 12 +-- 2 files changed, 105 insertions(+), 14 deletions(-) diff --git a/src/calibre/gui2/store/amazon_de_plugin.py b/src/calibre/gui2/store/amazon_de_plugin.py index f7b17a2e83..88ccbdbded 100644 --- a/src/calibre/gui2/store/amazon_de_plugin.py +++ b/src/calibre/gui2/store/amazon_de_plugin.py @@ -6,21 +6,23 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re, urllib +from contextlib import closing + +from lxml import html + from PyQt4.Qt import QUrl +from calibre import browser from calibre.gui2 import open_url -from calibre.gui2.store.amazon_plugin import AmazonKindleStore +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult -class AmazonDEKindleStore(AmazonKindleStore): +class AmazonDEKindleStore(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' - search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' - details_url = 'http://amazon.de/dp/' - drm_search_text = u'Gleichzeitige Verwendung von Geräten' - drm_free_text = u'Keine Einschränkung' - def open(self, parent=None, detail_item=None, external=False): aff_id = {'tag': 'charhale0a-21'} store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' @@ -32,3 +34,94 @@ class AmazonDEKindleStore(AmazonKindleStore): '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' + url = search_url + urllib.quote_plus(query) + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read()) + + # Amazon has two results pages. + is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])') + # Horizontal grid of books. + if is_shot: + data_xpath = '//div[contains(@class, "result")]' + format_xpath = './/div[@class="productTitle"]/text()' + cover_xpath = './/div[@class="productTitle"]//img/@src' + # Vertical list of books. + else: + data_xpath = '//div[@class="productData"]' + format_xpath = './/span[@class="format"]/text()' + cover_xpath = '../div[@class="productImage"]/a/img/@src' + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin_href = None + asin_a = data.xpath('.//div[@class="productTitle"]/a[1]') + if asin_a: + asin_href = asin_a[0].get('href', '') + m = re.search(r'/dp/(?P.+?)(/|$)', asin_href) + if m: + asin = m.group('asin') + else: + continue + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) + + if is_shot: + author = format.split(' von ')[-1] + else: + author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()')) + author = author.split(' von ')[-1] + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + + def get_details(self, search_result, timeout): + drm_search_text = u'Gleichzeitige Verwendung von Geräten' + drm_free_text = u'Keine Einschränkung' + url = 'http://amazon.de/dp/' + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + drm_search_text + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + drm_free_text + '") and contains(b, "' + + drm_search_text + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True diff --git a/src/calibre/gui2/store/amazon_uk_plugin.py b/src/calibre/gui2/store/amazon_uk_plugin.py index a922f0516b..fcc0c02e01 100644 --- a/src/calibre/gui2/store/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/amazon_uk_plugin.py @@ -15,17 +15,14 @@ from PyQt4.Qt import QUrl from calibre import browser from calibre.gui2 import open_url -from calibre.gui2.store.amazon_plugin import AmazonKindleStore +from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonUKKindleStore(AmazonKindleStore): +class AmazonUKKindleStore(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' - search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' - details_url = 'http://amazon.co.uk/dp/' - def open(self, parent=None, detail_item=None, external=False): aff_id = {'tag': 'calcharles-21'} store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id @@ -36,7 +33,8 @@ class AmazonUKKindleStore(AmazonKindleStore): open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - url = self.search_url + urllib.quote_plus(query) + search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' + url = search_url + urllib.quote_plus(query) br = browser() counter = max_results @@ -95,7 +93,7 @@ class AmazonUKKindleStore(AmazonKindleStore): if search_result.drm: return - url = self.details_url + url = 'http://amazon.co.uk/dp/' br = browser() with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: