From 1ab8aeb4b9805b66cb517f9dc2d5c7d7caa64d8c Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 3 Dec 2011 11:20:15 +0100 Subject: [PATCH 1/4] Add amazon.es and amazon.it. Fix amazon.fr apparently responding in UTF-8 now. --- src/calibre/customize/builtins.py | 24 +++++- .../gui2/store/stores/amazon_es_plugin.py | 81 +++++++++++++++++++ .../gui2/store/stores/amazon_fr_plugin.py | 4 +- .../gui2/store/stores/amazon_it_plugin.py | 81 +++++++++++++++++++ 4 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 src/calibre/gui2/store/stores/amazon_es_plugin.py create mode 100644 src/calibre/gui2/store/stores/amazon_it_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 6258434b1d..e576cf84ad 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1155,6 +1155,26 @@ class StoreAmazonFRKindleStore(StoreBase): formats = ['KINDLE'] affiliate = True +class StoreAmazonITKindleStore(StoreBase): + name = 'Amazon IT Kindle' + author = 'Charles Haley' + description = u'eBook Kindle a prezzi incredibili' + actual_plugin = 'calibre.gui2.store.stores.amazon_it_plugin:AmazonITKindleStore' + + headquarters = 'IT' + formats = ['KINDLE'] + affiliate = True + +class StoreAmazonESKindleStore(StoreBase): + name = 'Amazon ES Kindle' + author = 'Charles Haley' + description = u'eBook Kindle en EspaƱa' + actual_plugin = 'calibre.gui2.store.stores.amazon_es_plugin:AmazonESKindleStore' + + headquarters = 'ES' + formats = ['KINDLE'] + affiliate = True + class StoreAmazonUKKindleStore(StoreBase): name = 'Amazon UK Kindle' author = 'Charles Haley' @@ -1554,7 +1574,9 @@ plugins += [ StoreArchiveOrgStore, StoreAmazonKindleStore, StoreAmazonDEKindleStore, + StoreAmazonESKindleStore, StoreAmazonFRKindleStore, + StoreAmazonITKindleStore, StoreAmazonUKKindleStore, StoreBaenWebScriptionStore, StoreBNStore, @@ -1564,7 +1586,7 @@ plugins += [ StoreChitankaStore, StoreDieselEbooksStore, StoreEbookNLStore, - StoreEbookpointStore, + StoreEbookpointStore, StoreEbookscomStore, StoreEBookShoppeUKStore, StoreEHarlequinStore, diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py new file mode 100644 index 0000000000..d89c051d87 --- /dev/null +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult + +class AmazonESKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + + def open(self, parent=None, detail_item=None, external=False): + aff_id = {'tag': 'charhale09-21'} + store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id + if detail_item: + aff_id['asin'] = detail_item + store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id + open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' + url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read().decode('latin-1', 'replace')) + + data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' + format_xpath = './/span[@class="format"]/text()' + cover_xpath = './/img[@class="productImage"]/@src' + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). So we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = ''.join(data.xpath("@name")) + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) + author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) + if author.startswith('de '): + author = author[3:] + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + s.drm = SearchResult.DRM_UNKNOWN + + yield s diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index ca36f1055b..ea4c80e50d 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -38,7 +38,9 @@ class AmazonFRKindleStore(StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read().decode('latin-1', 'replace')) + # doc = html.fromstring(f.read().decode('latin-1', 'replace')) + # Apparently amazon.fr is responding in UTF-8 now + doc = html.fromstring(f.read()) data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' format_xpath = './/span[@class="format"]/text()' diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py new file mode 100644 index 0000000000..c62273deeb --- /dev/null +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult + +class AmazonITKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + + def open(self, parent=None, detail_item=None, external=False): + aff_id = {'tag': 'httpcharles07-21'} + store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id + if detail_item: + aff_id['asin'] = detail_item + store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id + open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' + url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read().decode('latin-1', 'replace')) + + data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' + format_xpath = './/span[@class="format"]/text()' + cover_xpath = './/img[@class="productImage"]/@src' + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). So we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = ''.join(data.xpath("@name")) + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) + author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) + if author.startswith('di '): + author = author[3:] + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + s.drm = SearchResult.DRM_UNKNOWN + + yield s From ea5b62a02356589a5e94868f6836279b88806580 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 3 Dec 2011 12:12:43 -0500 Subject: [PATCH 2/4] Store: Fix displaying covers in Diesel store. --- src/calibre/gui2/store/stores/diesel_ebooks_plugin.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py index a21d6943d7..a6876f8840 100644 --- a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py +++ b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py @@ -63,9 +63,6 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin): a, b, id = id.partition('/item/') cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src')) - if cover_url.startswith('/'): - cover_url = cover_url[1:] - cover_url = 'http://www.diesel-ebooks.com/' + cover_url title = ''.join(data.xpath('.//div[@class="content"]//h2/text()')) author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()')) From 6595a3d01c99cf90ca8b42089863d05f4eab815b Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 3 Dec 2011 12:48:05 -0500 Subject: [PATCH 3/4] Store: Fix ebooks.com store plugin. --- .../gui2/store/stores/ebooks_com_plugin.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/calibre/gui2/store/stores/ebooks_com_plugin.py b/src/calibre/gui2/store/stores/ebooks_com_plugin.py index 341d08abac..99510005f0 100644 --- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py +++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py @@ -54,7 +54,7 @@ class EbookscomStore(BasicStoreConfig, StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="book_a" or @class="book_b"]'): + for data in doc.xpath('//div[@id="results"]//li'): if counter <= 0: break @@ -64,15 +64,21 @@ class EbookscomStore(BasicStoreConfig, StorePlugin): continue id = mo.group() - cover_url = ''.join(data.xpath('.//img[1]/@src')) + cover_url = '' + cover_load = ''.join(data.xpath('.//div[@class="img"]//img/@onload')) + mo = re.search('(?<=\').+?(?=\')', cover_load) + if mo: + cover_url = mo.group(); title = '' author = '' - heading_a = data.xpath('.//a[1]/text()') - if heading_a: - title = heading_a[0] - if len(heading_a) >= 2: - author = heading_a[1] + header_parts = data.xpath('.//div[@class="descr"]/h4//a//text()') + if header_parts: + title = header_parts[0] + header_parts = header_parts[1:] + if header_parts: + author = ', '.join(header_parts) + counter -= 1 @@ -98,22 +104,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin): with closing(br.open(url + id, timeout=timeout)) as nf: pdoc = html.fromstring(nf.read()) - pdata = pdoc.xpath('//table[@class="price"]/tr/td/text()') - if len(pdata) >= 2: - price = pdata[1] + price_l = pdoc.xpath('//span[@class="price"]/text()') + if price_l: + price = price_l[0] + search_result.price = price.strip() search_result.drm = SearchResult.DRM_UNLOCKED - for sec in ('Printing', 'Copying', 'Lending'): - if pdoc.xpath('boolean(//div[@class="formatTableInner"]//table//tr[contains(th, "%s") and contains(td, "Off")])' % sec): - search_result.drm = SearchResult.DRM_LOCKED - break + permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()')) + if 'off' in permissions: + search_result.drm = SearchResult.DRM_LOCKED - fdata = ', '.join(pdoc.xpath('//table[@class="price"]//tr//td[1]/text()')) - fdata = fdata.replace(':', '') - fdata = re.sub(r'\s{2,}', ' ', fdata) - fdata = fdata.replace(' ,', ',') - fdata = fdata.strip() - search_result.formats = fdata - - search_result.price = price.strip() + fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()') + if len(fdata) > 1: + search_result.formats = ', '.join(fdata[1:]) + return True From 9c3af6965160c55e1d1c37419abbbe85accbe2c3 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 3 Dec 2011 13:08:39 -0500 Subject: [PATCH 4/4] Store: Search, Only use the search filter's filtered results when there is a query and it is a filterable query. This allows for the stores best guess matches to come though. --- src/calibre/gui2/store/search/models.py | 36 ++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/store/search/models.py b/src/calibre/gui2/store/search/models.py index e6ef147861..975601c603 100644 --- a/src/calibre/gui2/store/search/models.py +++ b/src/calibre/gui2/store/search/models.py @@ -62,6 +62,7 @@ class Matches(QAbstractItemModel): # Only the showing matches. self.matches = [] self.query = '' + self.filterable_query = False self.search_filter = SearchFilter() self.cover_pool = CoverThreadPool(cover_thread_count) self.details_pool = DetailsThreadPool(detail_thread_count) @@ -82,6 +83,7 @@ class Matches(QAbstractItemModel): self.all_matches = [] self.search_filter.clear_search_results() self.query = '' + self.filterable_query = False self.cover_pool.abort() self.details_pool.abort() self.total_changed.emit(self.rowCount()) @@ -113,7 +115,10 @@ class Matches(QAbstractItemModel): def filter_results(self): self.layoutAboutToBeChanged.emit() - if self.query: + # Only use the search filter's filtered results when there is a query + # and it is a filterable query. This allows for the stores best guess + # matches to come though. + if self.query and self.filterable_query: self.matches = list(self.search_filter.parse(self.query)) else: self.matches = list(self.search_filter.universal_set()) @@ -134,6 +139,35 @@ class Matches(QAbstractItemModel): def set_query(self, query): self.query = query + self.filterable_query = self.is_filterable_query(query) + + def is_filterable_query(self, query): + # Remove control modifiers. + query = query.replace('\\', '') + query = query.replace('!', '') + query = query.replace('=', '') + query = query.replace('~', '') + query = query.replace('>', '') + query = query.replace('<', '') + # Store the query at this point for comparision later + mod_query = query + # Remove filter identifiers + # Remove the prefix. + for loc in ('all', 'author', 'authors', 'title'): + query = re.sub(r'%s:"(?P[^\s"]+)"' % loc, '\g', query) + query = query.replace('%s:' % loc, '') + # Remove the prefix and search text. + for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'): + query = re.sub(r'%s:"[^"]"' % loc, '', query) + query = re.sub(r'%s:[^\s]*' % loc, '', query) + # Remove whitespace + query = re.sub('\s', '', query) + mod_query = re.sub('\s', '', mod_query) + # If mod_query and query are the same then there were no filter modifiers + # so this isn't a filterable query. + if mod_query == query: + return False + return True def index(self, row, column, parent=QModelIndex()): return self.createIndex(row, column)