From 21346e3e20d3bc04df3dba1a6bc29e961f24c4ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Dec 2015 10:06:33 +0530 Subject: [PATCH] Get Books: Add store plugins for Amazon Australia and Amazon India. Fixes #1526089 [Feature request - please add Amazon.au](https://bugs.launchpad.net/calibre/+bug/1526089) --- src/calibre/customize/builtins.py | 25 +- .../gui2/store/stores/amazon_au_plugin.py | 141 +++++++++++ .../gui2/store/stores/amazon_ca_plugin.py | 226 +++++++++--------- .../gui2/store/stores/amazon_in_plugin.py | 143 +++++++++++ .../gui2/store/stores/amazon_plugin.py | 134 +++-------- 5 files changed, 445 insertions(+), 224 deletions(-) create mode 100644 src/calibre/gui2/store/stores/amazon_au_plugin.py create mode 100644 src/calibre/gui2/store/stores/amazon_in_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index a39a37655f..ceda2d48f8 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1276,15 +1276,32 @@ class StoreSonyAUStore(StoreSonyStore): actual_plugin = 'calibre.gui2.store.stores.sony_au_plugin:SonyStore' headquarters = 'AU' +class StoreAmazonAUKindleStore(StoreBase): + name = 'Amazon AU Kindle' + author = u'Kovid Goyal' + description = u'Kindle books from Amazon.' + actual_plugin = 'calibre.gui2.store.stores.amazon_au_plugin:AmazonKindleStore' + + headquarters = 'AU' + formats = ['KINDLE'] + class StoreAmazonCAKindleStore(StoreBase): name = 'Amazon CA Kindle' - author = u'Tomasz Długosz' + author = u'Kovid Goyal' description = u'Kindle books from Amazon.' - actual_plugin = 'calibre.gui2.store.stores.amazon_ca_plugin:AmazonCAKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_ca_plugin:AmazonKindleStore' headquarters = 'CA' formats = ['KINDLE'] - # affiliate = True + +class StoreAmazonINKindleStore(StoreBase): + name = 'Amazon IN Kindle' + author = u'Kovid Goyal' + description = u'Kindle books from Amazon.' + actual_plugin = 'calibre.gui2.store.stores.amazon_in_plugin:AmazonKindleStore' + + headquarters = 'IN' + formats = ['KINDLE'] # Remove this plugin definition in Jan 2016 because of Amazon demand. We leave # it here until then so users have a chance to see wny the store is gone. @@ -1750,7 +1767,9 @@ plugins += [ StoreBubokPublishingStore, StoreBubokPortugalStore, StoreAmazonKindleStore, + StoreAmazonAUKindleStore, StoreAmazonCAKindleStore, + StoreAmazonINKindleStore, StoreAmazonDEKindleStore, StoreAmazonESKindleStore, StoreAmazonFRKindleStore, diff --git a/src/calibre/gui2/store/stores/amazon_au_plugin.py b/src/calibre/gui2/store/stores/amazon_au_plugin.py new file mode 100644 index 0000000000..4acf286d8c --- /dev/null +++ b/src/calibre/gui2/store/stores/amazon_au_plugin.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 1 # Needed for dynamic plugin loading + +from contextlib import closing +import urllib + +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult + +SEARCH_BASE_URL = 'http://www.amazon.com.au/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +DETAILS_URL = 'http://amazon.com.au/dp/' +STORE_LINK = 'http://www.amazon.com.au' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' + +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="Kindle Edition"]/@title' + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "by ")]/following-sibling::span//text()' + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): + + def open(self, parent=None, detail_item=None, external=False): + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result + + def get_details(self, search_result, timeout): + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_ca_plugin.py b/src/calibre/gui2/store/stores/amazon_ca_plugin.py index 5f7554e361..543985a151 100644 --- a/src/calibre/gui2/store/stores/amazon_ca_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_ca_plugin.py @@ -1,13 +1,13 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 3 # Needed for dynamic plugin loading from contextlib import closing +import urllib from lxml import html @@ -18,134 +18,124 @@ from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonCAKindleStore(StorePlugin): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' +SEARCH_BASE_URL = 'http://www.amazon.ca/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +DETAILS_URL = 'http://amazon.ca/dp/' +STORE_LINK = 'http://www.amazon.ca' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' - search_url = 'http://www.amazon.ca/s/url=search-alias%3Ddigital-text&field-keywords=' - details_url = 'http://amazon.ca/dp/' - drm_search_text = u'Simultaneous Device Usage' - drm_free_text = u'Unlimited' +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="Kindle Edition"]/@title' + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "by ")]/following-sibling::span//text()' + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - #aff_id = {'tag': ''} - # Use Kovid's affiliate id 30% of the time. - # if random.randint(1, 10) in (1, 2, 3): - # aff_id['tag'] = 'calibrebs-20' - # store_link = 'http://www.amazon.ca/Kindle-eBooks/b/?ie=UTF&node=1286228011&ref_=%(tag)s&ref=%(tag)s&tag=%(tag)s&linkCode=ur2&camp=1789&creative=390957' % aff_id - store_link = 'http://www.amazon.ca/ebooks-kindle/b/ref=sa_menu_kbo?ie=UTF8&node=2980423011' - if detail_item: - # aff_id['asin'] = detail_item - # store_link = 'http://www.amazon.ca/dp/%(asin)s/?tag=%(tag)s' % aff_id - store_link = 'http://www.amazon.ca/dp/' + detail_item + '/' + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - - if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): - data_xpath = '//div[contains(@class, "prod")]' - format_xpath = ( - './/ul[contains(@class, "rsltGridList")]' - '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') - asin_xpath = '@name' - cover_xpath = './/img[contains(@class, "productImage")]/@src' - title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' - price_xpath = ( - './/ul[contains(@class, "rsltGridList")]' - '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') - elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'): - data_xpath = '//li[(@class="ilo")]' - format_xpath = ( - './/ul[contains(@class, "rsltGridList")]' - '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') - asin_xpath = '@name' - cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src' - title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' - # Results can be in a grid (table) or a column - price_xpath = ( - './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]' - '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') - elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): - data_xpath = '//div[contains(@class, "prod")]' - format_xpath = ( - './/ul[contains(@class, "rsltL")]' - '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') - asin_xpath = '@name' - cover_xpath = './/img[contains(@class, "productImage")]/@src' - title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' - price_xpath = ( - './/ul[contains(@class, "rsltL")]' - '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') - else: - return - - for data in doc.xpath(data_xpath): - if counter <= 0: - break - - # Even though we are searching digital-text only Amazon will still - # put in results for non Kindle books (author pages). Se we need - # to explicitly check if the item is a Kindle book and ignore it - # if it isn't. - format = ''.join(data.xpath(format_xpath)) - if 'kindle' not in format.lower(): - continue - - # We must have an asin otherwise we can't easily reference the - # book later. - asin = data.xpath(asin_xpath) - if asin: - asin = asin[0] - else: - continue - - cover_url = ''.join(data.xpath(cover_xpath)) - - title = ''.join(data.xpath(title_xpath)) - author = ''.join(data.xpath(author_xpath)) - try: - author = author.split('by ', 1)[1].split(" (")[0] - except: - pass - - price = ''.join(data.xpath(price_xpath)) - - counter -= 1 - - s = SearchResult() - s.cover_url = cover_url.strip() - s.title = title.strip() - s.author = author.strip() - s.price = price.strip() - s.detail_item = asin.strip() - s.formats = 'Kindle' - - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - url = self.details_url + url = DETAILS_URL br = browser() with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: idata = html.fromstring(nf.read()) if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + - self.drm_search_text + '")])'): + DRM_SEARCH_TEXT + '")])'): if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + - self.drm_free_text + '") and contains(b, "' + - self.drm_search_text + '")])'): + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): search_result.drm = SearchResult.DRM_UNLOCKED else: search_result.drm = SearchResult.DRM_UNKNOWN else: search_result.drm = SearchResult.DRM_LOCKED return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_in_plugin.py b/src/calibre/gui2/store/stores/amazon_in_plugin.py new file mode 100644 index 0000000000..1f6afa85d5 --- /dev/null +++ b/src/calibre/gui2/store/stores/amazon_in_plugin.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 1 # Needed for dynamic plugin loading + +from contextlib import closing +import urllib + +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult + +SEARCH_BASE_URL = 'http://www.amazon.in/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +DETAILS_URL = 'http://amazon.in/dp/' +STORE_LINK = 'http://www.amazon.in' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' + +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="Kindle Edition"]/@title' + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "by ")]/following-sibling::span//text()' + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + if s.price: + s.price = '₹ ' + s.price + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): + + def open(self, parent=None, detail_item=None, external=False): + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result + + def get_details(self, search_result, timeout): + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_plugin.py b/src/calibre/gui2/store/stores/amazon_plugin.py index 4a32bf68bf..afb06ecdc5 100644 --- a/src/calibre/gui2/store/stores/amazon_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_plugin.py @@ -1,13 +1,13 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading from contextlib import closing +import urllib from lxml import html @@ -18,10 +18,27 @@ from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult +SEARCH_BASE_URL = 'http://www.amazon.com/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +DETAILS_URL = 'http://amazon.com/dp/' +STORE_LINK = 'http://www.amazon.com/Kindle-eBooks' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' + def search_amazon(query, max_results=10, timeout=60, write_html_to=None, - search_url='http://www.amazon.com/s/?url=search-alias%3Ddigital-text&field-keywords='): - url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') br = browser() counter = max_results @@ -129,97 +146,8 @@ def search_amazon(query, max_results=10, timeout=60, class AmazonKindleStore(StorePlugin): - details_url = 'http://amazon.com/dp/' - drm_search_text = u'Simultaneous Device Usage' - drm_free_text = u'Unlimited' - def open(self, parent=None, detail_item=None, external=False): - ''' - Amazon comes with a number of difficulties. - - QWebView has major issues with Amazon.com. The largest of - issues is it simply doesn't work on a number of pages. - - When connecting to a number parts of Amazon.com (Kindle library - for instance) QNetworkAccessManager fails to connect with a - NetworkError of 399 - ProtocolFailure. The strange thing is, - when I check QNetworkRequest.HttpStatusCodeAttribute when the - 399 error is returned the status code is 200 (Ok). However, once - the QNetworkAccessManager decides there was a NetworkError it - does not download the page from Amazon. So I can't even set the - HTML in the QWebView myself. - - There is http://bugreports.qt.nokia.com/browse/QTWEBKIT-259 an - open bug about the issue but it is not correct. We can set the - useragent (Arora does) to something else and the above issue - will persist. This http://developer.qt.nokia.com/forums/viewthread/793 - gives a bit more information about the issue but as of now (27/Feb/2011) - there is no solution or work around. - - We cannot change the The linkDelegationPolicy to allow us to avoid - QNetworkAccessManager because it only works links. Forms aren't - included so the same issue persists on any part of the site (login) - that use a form to load a new page. - - Using an aStore was evaluated but I've decided against using it. - There are three major issues with an aStore. Because checkout is - handled by sending the user to Amazon we can't put it in a QWebView. - If we're sending the user to Amazon sending them there directly is - nicer. Also, we cannot put the aStore in a QWebView and let it open the - redirection the users default browser because the cookies with the - shopping cart won't transfer. - - Another issue with the aStore is how it handles the referral. It only - counts the referral for the items in the shopping card / the item - that directed the user to Amazon. Kindle books do not use the shopping - cart and send the user directly to Amazon for the purchase. In this - instance we would only get referral credit for the one book that the - aStore directs to Amazon that the user buys. Any other purchases we - won't get credit for. - - The last issue with the aStore is performance. Even though it's an - Amazon site it's alow. So much slower than Amazon.com that it makes - me not want to browse books using it. The look and feel are lesser - issues. So is the fact that it almost seems like the purchase is - with calibre. This can cause some support issues because we can't - do much for issues with Amazon.com purchase hiccups. - - Another option that was evaluated was the Product Advertising API. - The reasons against this are complexity. It would take a lot of work - to basically re-create Amazon.com within calibre. The Product - Advertising API is also designed with being run on a server not - in an app. The signing keys would have to be made avaliable to ever - calibre user which means bad things could be done with our account. - - The Product Advertising API also assumes the same browser for easy - shopping cart transfer to Amazon. With QWebView not working and there - not being an easy way to transfer cookies between a QWebView and the - users default browser this won't work well. - - We could create our own website on the calibre server and create an - Amazon Product Advertising API store. However, this goes back to the - complexity argument. Why spend the time recreating Amazon.com - - The final and largest issue against using the Product Advertising API - is the Efficiency Guidelines: - - "Each account used to access the Product Advertising API will be allowed - an initial usage limit of 2,000 requests per hour. Each account will - receive an additional 500 requests per hour (up to a maximum of 25,000 - requests per hour) for every $1 of shipped item revenue driven per hour - in a trailing 30-day period. Usage thresholds are recalculated daily based - on revenue performance." - - With over two million users a limit of 2,000 request per hour could - render our store unusable for no other reason than Amazon rate - limiting our traffic. - - The best (I use the term lightly here) solution is to open Amazon.com - in the users default browser and set the affiliate id as part of the url. - ''' - store_link = 'http://www.amazon.com/Kindle-eBooks' - if detail_item: - store_link = 'http://www.amazon.com/dp/%s' % detail_item + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): @@ -227,16 +155,16 @@ class AmazonKindleStore(StorePlugin): yield result def get_details(self, search_result, timeout): - url = self.details_url + url = DETAILS_URL br = browser() with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: idata = html.fromstring(nf.read()) if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + - self.drm_search_text + '")])'): + DRM_SEARCH_TEXT + '")])'): if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + - self.drm_free_text + '") and contains(b, "' + - self.drm_search_text + '")])'): + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): search_result.drm = SearchResult.DRM_UNLOCKED else: search_result.drm = SearchResult.DRM_UNKNOWN