diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index ceda2d48f8..412998f292 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1303,65 +1303,50 @@ class StoreAmazonINKindleStore(StoreBase): headquarters = 'IN' formats = ['KINDLE'] -# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave -# it here until then so users have a chance to see wny the store is gone. class StoreAmazonDEKindleStore(StoreBase): name = 'Amazon DE Kindle' - author = 'Charles Haley' + author = 'Kovid Goyal' description = u'Kindle Bücher von Amazon.' - actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonDEKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonKindleStore' headquarters = 'DE' formats = ['KINDLE'] - affiliate = True -# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave -# it here until then so users have a chance to see wny the store is gone. class StoreAmazonFRKindleStore(StoreBase): name = 'Amazon FR Kindle' - author = 'Charles Haley' + author = 'Kovid Goyal' description = u'Tous les ebooks Kindle' - actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonKindleStore' headquarters = 'FR' formats = ['KINDLE'] - affiliate = True -# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave -# it here until then so users have a chance to see wny the store is gone. class StoreAmazonITKindleStore(StoreBase): name = 'Amazon IT Kindle' - author = 'Charles Haley' + author = 'Kovid Goyal' description = u'eBook Kindle a prezzi incredibili' - actual_plugin = 'calibre.gui2.store.stores.amazon_it_plugin:AmazonITKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_it_plugin:AmazonKindleStore' headquarters = 'IT' formats = ['KINDLE'] - affiliate = True -# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave -# it here until then so users have a chance to see wny the store is gone. class StoreAmazonESKindleStore(StoreBase): name = 'Amazon ES Kindle' - author = 'Charles Haley' + author = 'Kovid Goyal' description = u'eBook Kindle en España' - actual_plugin = 'calibre.gui2.store.stores.amazon_es_plugin:AmazonESKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_es_plugin:AmazonKindleStore' headquarters = 'ES' formats = ['KINDLE'] - affiliate = True -# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave -# it here until then so users have a chance to see wny the store is gone. class StoreAmazonUKKindleStore(StoreBase): name = 'Amazon UK Kindle' - author = 'Charles Haley' + author = 'Kovid Goyal' description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.' - actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonUKKindleStore' + actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonKindleStore' headquarters = 'UK' formats = ['KINDLE'] - affiliate = True class StoreArchiveOrgStore(StoreBase): name = 'Archive.org' diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index bf1896dcdb..4bd191f806 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -1,38 +1,143 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from contextlib import closing +import urllib +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonDEKindleStore(StorePlugin): - ''' - Amazon forcibly closed the affiliate account, requesting that "all links - toward Amazon content be removed". - ''' +SEARCH_BASE_URL = 'http://www.amazon.de/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +BY = 'von' +KINDLE_EDITION = 'Kindle Edition' +DETAILS_URL = 'http://amazon.de/dp/' +STORE_LINK = 'http://www.amazon.de' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' - def genesis(self): - StorePlugin.genesis(self) - from calibre.customize.ui import find_plugin - pi = find_plugin('Amazon DE Kindle') - pi.affiliate = False +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[contains(text(), "%s")]//text()' % KINDLE_EDITION + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "%s ")]/following-sibling::span//text()' % BY + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pass + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - s = SearchResult() - s.title = 'Amazon required that this
store be permanently closed.' - s.author = '' - s.price = '' - s.detail_item = '' - s.drm = SearchResult.DRM_UNKNOWN - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - pass \ No newline at end of file + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py index 8e434f9e04..9b07163106 100644 --- a/src/calibre/gui2/store/stores/amazon_es_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -1,38 +1,143 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from contextlib import closing +import urllib +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonESKindleStore(StorePlugin): - ''' - Amazon forcibly closed the affiliate account, requesting that "all links - toward Amazon content be removed". - ''' +SEARCH_BASE_URL = 'http://www.amazon.es/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +BY = 'de' +KINDLE_EDITION = 'Versión Kindle' +DETAILS_URL = 'http://amazon.es/dp/' +STORE_LINK = 'http://www.amazon.es' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' - def genesis(self): - StorePlugin.genesis(self) - from calibre.customize.ui import find_plugin - pi = find_plugin('Amazon ES Kindle') - pi.affiliate = False +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="%s"]/@title' % KINDLE_EDITION + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "%s ")]/following-sibling::span//text()' % BY + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pass + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - s = SearchResult() - s.title = 'Amazon required that this
store be permanently closed.' - s.author = '' - s.price = '' - s.detail_item = '' - s.drm = SearchResult.DRM_UNKNOWN - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - pass \ No newline at end of file + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index 24d9f13f28..9684837559 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -1,38 +1,143 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from contextlib import closing +import urllib +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonFRKindleStore(StorePlugin): - ''' - Amazon forcibly closed the affiliate account, requesting that "all links - toward Amazon content be removed". - ''' +SEARCH_BASE_URL = 'http://www.amazon.fr/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +BY = 'de' +KINDLE_EDITION = 'Format Kindle' +DETAILS_URL = 'http://amazon.fr/dp/' +STORE_LINK = 'http://www.amazon.fr' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' - def genesis(self): - StorePlugin.genesis(self) - from calibre.customize.ui import find_plugin - pi = find_plugin('Amazon FR Kindle') - pi.affiliate = False +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="%s"]/@title' % KINDLE_EDITION + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "%s ")]/following-sibling::span//text()' % BY + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pass + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - s = SearchResult() - s.title = 'Amazon required that this
store be permanently closed.' - s.author = '' - s.price = '' - s.detail_item = '' - s.drm = SearchResult.DRM_UNKNOWN - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - pass + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py index fd44895ca6..3a2a364172 100644 --- a/src/calibre/gui2/store/stores/amazon_it_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -1,38 +1,143 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from contextlib import closing +import urllib +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -class AmazonITKindleStore(StorePlugin): - ''' - Amazon forcibly closed the affiliate account, requesting that "all links - toward Amazon content be removed". - ''' +SEARCH_BASE_URL = 'http://www.amazon.it/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +BY = 'di' +KINDLE_EDITION = 'Formato Kindle' +DETAILS_URL = 'http://amazon.it/dp/' +STORE_LINK = 'http://www.amazon.it' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' - def genesis(self): - StorePlugin.genesis(self) - from calibre.customize.ui import find_plugin - pi = find_plugin('Amazon IT Kindle') - pi.affiliate = False +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return + + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[@title="%s"]/@title' % KINDLE_EDITION + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "%s ")]/following-sibling::span//text()' % BY + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pass + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - s = SearchResult() - s.title = 'Amazon required that this
store be permanently closed.' - s.author = '' - s.price = '' - s.detail_item = '' - s.drm = SearchResult.DRM_UNKNOWN - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - pass \ No newline at end of file + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result) diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 69e5d6ef21..1f3e2481c3 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -1,40 +1,141 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 10 # Needed for dynamic plugin loading +from __future__ import (unicode_literals, division, absolute_import, + print_function) +store_version = 11 # Needed for dynamic plugin loading -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' +from contextlib import closing +import urllib +from lxml import html + +from PyQt5.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult +SEARCH_BASE_URL = 'http://www.amazon.co.uk/s/' +SEARCH_BASE_QUERY = {'url': 'search-alias=digital-text'} +DETAILS_URL = 'http://amazon.co.uk/dp/' +STORE_LINK = 'http://www.amazon.co.uk' +DRM_SEARCH_TEXT = 'Simultaneous Device Usage' +DRM_FREE_TEXT = 'Unlimited' +def search_amazon(query, max_results=10, timeout=60, + write_html_to=None, + base_url=SEARCH_BASE_URL, + base_query=SEARCH_BASE_QUERY, + field_keywords='field-keywords' + ): + uquery = base_query.copy() + uquery[field_keywords] = query + def asbytes(x): + if isinstance(x, type('')): + x = x.encode('utf-8') + return x + uquery = {asbytes(k):asbytes(v) for k, v in uquery.iteritems()} + url = base_url + '?' + urllib.urlencode(uquery).decode('ascii') + br = browser() -class AmazonUKKindleStore(StorePlugin): - ''' - Amazon forcibly closed the affiliate account, requesting that "all links - toward Amazon content be removed". - ''' + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + if write_html_to is not None: + with open(write_html_to, 'wb') as f: + f.write(raw) + doc = html.fromstring(raw) + try: + results = doc.xpath('//div[@id="atfResults" and @class]')[0] + except IndexError: + return - def genesis(self): - StorePlugin.genesis(self) - from calibre.customize.ui import find_plugin - pi = find_plugin('Amazon UK Kindle') - pi.affiliate = False + if 's-result-list-parent-container' in results.get('class', ''): + data_xpath = "descendant-or-self::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-result-item ')]" + format_xpath = './/a[contains(text(), "Kindle Edition")]//text()' + asin_xpath = '@data-asin' + cover_xpath = "descendant-or-self::img[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-image ')]/@src" + title_xpath = "descendant-or-self::h2[@class and contains(concat(' ', normalize-space(@class), ' '), ' s-access-title ')]//text()" + author_xpath = './/span[starts-with(text(), "by ")]/following-sibling::span//text()' + price_xpath = '(.//span[contains(@class, " s-price ")])[last()]//text()' + else: + return + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). Se we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = data.xpath(asin_xpath) + if asin: + asin = asin[0] + else: + continue + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath(title_xpath)) + author = ''.join(data.xpath(author_xpath)) + try: + author = author.split('by ', 1)[1].split(" (")[0] + except: + pass + + price = ''.join(data.xpath(price_xpath)) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + + yield s + +class AmazonKindleStore(StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pass + store_link = (DETAILS_URL + detail_item) if detail_item else STORE_LINK + open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - s = SearchResult() - s.title = 'Amazon required that this
store be permanently closed.' - s.author = '' - s.price = '' - s.detail_item = '' - s.drm = SearchResult.DRM_UNKNOWN - yield s + for result in search_amazon(query, max_results=max_results, timeout=timeout): + yield result def get_details(self, search_result, timeout): - pass \ No newline at end of file + url = DETAILS_URL + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + DRM_SEARCH_TEXT + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + DRM_FREE_TEXT + '") and contains(b, "' + + DRM_SEARCH_TEXT + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + +if __name__ == '__main__': + import sys + for result in search_amazon(' '.join(sys.argv[1:]), write_html_to='/t/amazon.html'): + print (result)