Get Books: Fix ebooks.com. Fix cover display in Diesel ebooks store. Fix amazon.fr. Add amazon.es and amazon.it

2025-07-09 03:04:10 -04:00 · 2011-12-04 11:00:39 +05:30 · 2011-12-04 11:00:39 +05:30 · 46ba6f44f5
commit 46ba6f44f5
parent a45ea253c8 9c3af69651
7 changed files with 247 additions and 28 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1155,6 +1155,26 @@ class StoreAmazonFRKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True

+class StoreAmazonITKindleStore(StoreBase):
+    name = 'Amazon IT Kindle'
+    author = 'Charles Haley'
+    description = u'eBook Kindle a prezzi incredibili'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_it_plugin:AmazonITKindleStore'
+
+    headquarters = 'IT'
+    formats = ['KINDLE']
+    affiliate = True
+
+class StoreAmazonESKindleStore(StoreBase):
+    name = 'Amazon ES Kindle'
+    author = 'Charles Haley'
+    description = u'eBook Kindle en España'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_es_plugin:AmazonESKindleStore'
+
+    headquarters = 'ES'
+    formats = ['KINDLE']
+    affiliate = True
+
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1554,7 +1574,9 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
+    StoreAmazonESKindleStore,
    StoreAmazonFRKindleStore,
+    StoreAmazonITKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
@ -1564,7 +1586,7 @@ plugins += [
    StoreChitankaStore,
    StoreDieselEbooksStore,
    StoreEbookNLStore,
-	StoreEbookpointStore,
+    StoreEbookpointStore,
    StoreEbookscomStore,
    StoreEBookShoppeUKStore,
    StoreEHarlequinStore,
--- a/src/calibre/gui2/store/search/models.py
+++ b/src/calibre/gui2/store/search/models.py
@ -62,6 +62,7 @@ class Matches(QAbstractItemModel):
        # Only the showing matches.
        self.matches = []
        self.query = ''
+        self.filterable_query = False
        self.search_filter = SearchFilter()
        self.cover_pool = CoverThreadPool(cover_thread_count)
        self.details_pool = DetailsThreadPool(detail_thread_count)
@ -82,6 +83,7 @@ class Matches(QAbstractItemModel):
        self.all_matches = []
        self.search_filter.clear_search_results()
        self.query = ''
+        self.filterable_query = False
        self.cover_pool.abort()
        self.details_pool.abort()
        self.total_changed.emit(self.rowCount())
@ -113,7 +115,10 @@ class Matches(QAbstractItemModel):

    def filter_results(self):
        self.layoutAboutToBeChanged.emit()
-        if self.query:
+        # Only use the search filter's filtered results when there is a query
+        # and it is a filterable query. This allows for the stores best guess
+        # matches to come though.
+        if self.query and self.filterable_query:
            self.matches = list(self.search_filter.parse(self.query))
        else:
            self.matches = list(self.search_filter.universal_set())
@ -134,6 +139,35 @@ class Matches(QAbstractItemModel):

    def set_query(self, query):
        self.query = query
+        self.filterable_query = self.is_filterable_query(query)
+        
+    def is_filterable_query(self, query):
+        # Remove control modifiers.
+        query = query.replace('\\', '')
+        query = query.replace('!', '')
+        query = query.replace('=', '')
+        query = query.replace('~', '')
+        query = query.replace('>', '')
+        query = query.replace('<', '')
+        # Store the query at this point for comparision later
+        mod_query = query
+        # Remove filter identifiers
+        # Remove the prefix.
+        for loc in ('all', 'author', 'authors', 'title'):
+            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
+            query = query.replace('%s:' % loc, '')
+        # Remove the prefix and search text.
+        for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
+            query = re.sub(r'%s:"[^"]"' % loc, '', query)
+            query = re.sub(r'%s:[^\s]*' % loc, '', query)
+        # Remove whitespace
+        query = re.sub('\s', '', query)
+        mod_query = re.sub('\s', '', mod_query)
+        # If mod_query and query are the same then there were no filter modifiers
+        # so this isn't a filterable query.
+        if mod_query == query:
+            return False
+        return True

    def index(self, row, column, parent=QModelIndex()):
        return self.createIndex(row, column)
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.search_result import SearchResult
+
+class AmazonESKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+        aff_id = {'tag': 'charhale09-21'}
+        store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
+        if detail_item:
+            aff_id['asin'] = detail_item
+            store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            format_xpath = './/span[@class="format"]/text()'
+            cover_xpath = './/img[@class="productImage"]/@src'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). So we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = ''.join(data.xpath("@name"))
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
+                if author.startswith('de '):
+                    author = author[3:]
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.formats = 'Kindle'
+                s.drm = SearchResult.DRM_UNKNOWN
+
+                yield s
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -38,7 +38,9 @@ class AmazonFRKindleStore(StorePlugin):

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # Apparently amazon.fr is responding in UTF-8 now
+            doc = html.fromstring(f.read())

            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.search_result import SearchResult
+
+class AmazonITKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+        aff_id = {'tag': 'httpcharles07-21'}
+        store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
+        if detail_item:
+            aff_id['asin'] = detail_item
+            store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            format_xpath = './/span[@class="format"]/text()'
+            cover_xpath = './/img[@class="productImage"]/@src'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). So we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = ''.join(data.xpath("@name"))
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
+                if author.startswith('di '):
+                    author = author[3:]
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.formats = 'Kindle'
+                s.drm = SearchResult.DRM_UNKNOWN
+
+                yield s
--- a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
+++ b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
@ -63,9 +63,6 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
                a, b, id = id.partition('/item/')

                cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
-                if cover_url.startswith('/'):
-                    cover_url = cover_url[1:]
-                cover_url = 'http://www.diesel-ebooks.com/' + cover_url

                title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
                author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
--- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py
+++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py
@ -54,7 +54,7 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="book_a" or @class="book_b"]'):
+            for data in doc.xpath('//div[@id="results"]//li'):
                if counter <= 0:
                    break

@ -64,15 +64,21 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
                    continue
                id = mo.group()
                
-                cover_url = ''.join(data.xpath('.//img[1]/@src'))
+                cover_url = ''
+                cover_load = ''.join(data.xpath('.//div[@class="img"]//img/@onload'))
+                mo = re.search('(?<=\').+?(?=\')', cover_load)
+                if mo:
+                    cover_url = mo.group();
                
                title = ''
                author = ''
-                heading_a = data.xpath('.//a[1]/text()')
-                if heading_a:
-                    title = heading_a[0]
-                if len(heading_a) >= 2:
-                    author = heading_a[1]
+                header_parts = data.xpath('.//div[@class="descr"]/h4//a//text()')
+                if header_parts:
+                    title = header_parts[0]
+                    header_parts = header_parts[1:]
+                if header_parts:
+                    author = ', '.join(header_parts)
+                

                counter -= 1
                
@ -98,22 +104,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
        with closing(br.open(url + id, timeout=timeout)) as nf:
            pdoc = html.fromstring(nf.read())
            
-            pdata = pdoc.xpath('//table[@class="price"]/tr/td/text()')
-            if len(pdata) >= 2:
-                price = pdata[1]
+            price_l = pdoc.xpath('//span[@class="price"]/text()')
+            if price_l:
+                price = price_l[0]
+            search_result.price = price.strip()
            
            search_result.drm = SearchResult.DRM_UNLOCKED
-            for sec in ('Printing', 'Copying', 'Lending'):
-                if pdoc.xpath('boolean(//div[@class="formatTableInner"]//table//tr[contains(th, "%s") and contains(td, "Off")])' % sec):
-                    search_result.drm = SearchResult.DRM_LOCKED
-                    break
+            permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()'))
+            if 'off' in permissions:
+                search_result.drm = SearchResult.DRM_LOCKED
            
-            fdata = ', '.join(pdoc.xpath('//table[@class="price"]//tr//td[1]/text()'))
-            fdata = fdata.replace(':', '')
-            fdata = re.sub(r'\s{2,}', ' ', fdata)
-            fdata = fdata.replace(' ,', ',')
-            fdata = fdata.strip()
-            search_result.formats = fdata
-        
-        search_result.price = price.strip()
+            fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()')
+            if len(fdata) > 1:
+                search_result.formats = ', '.join(fdata[1:])
+
        return True