Get Books: Fix ebooks.com. Fix cover display in Diesel ebooks store. Fix amazon.fr. Add amazon.es and amazon.it

2025-07-09 03:04:10 -04:00 · 2011-12-04 11:00:39 +05:30 · 2011-12-04 11:00:39 +05:30 · 46ba6f44f5
commit 46ba6f44f5
parent a45ea253c8 9c3af69651
7 changed files with 247 additions and 28 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1155,6 +1155,26 @@ class StoreAmazonFRKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonITKindleStore(StoreBase):
    name = 'Amazon IT Kindle'
    author = 'Charles Haley'
    description = u'eBook Kindle a prezzi incredibili'
    actual_plugin = 'calibre.gui2.store.stores.amazon_it_plugin:AmazonITKindleStore'
    headquarters = 'IT'
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonESKindleStore(StoreBase):
    name = 'Amazon ES Kindle'
    author = 'Charles Haley'
    description = u'eBook Kindle en España'
    actual_plugin = 'calibre.gui2.store.stores.amazon_es_plugin:AmazonESKindleStore'
    headquarters = 'ES'
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1554,7 +1574,9 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
    StoreAmazonESKindleStore,
    StoreAmazonFRKindleStore,
    StoreAmazonITKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
@ -1564,7 +1586,7 @@ plugins += [
    StoreChitankaStore,
    StoreDieselEbooksStore,
    StoreEbookNLStore,
-	StoreEbookpointStore,
+    StoreEbookpointStore,
    StoreEbookscomStore,
    StoreEBookShoppeUKStore,
    StoreEHarlequinStore,
--- a/src/calibre/gui2/store/search/models.py
+++ b/src/calibre/gui2/store/search/models.py
@ -62,6 +62,7 @@ class Matches(QAbstractItemModel):
        # Only the showing matches.
        self.matches = []
        self.query = ''
        self.filterable_query = False
        self.search_filter = SearchFilter()
        self.cover_pool = CoverThreadPool(cover_thread_count)
        self.details_pool = DetailsThreadPool(detail_thread_count)
@ -82,6 +83,7 @@ class Matches(QAbstractItemModel):
        self.all_matches = []
        self.search_filter.clear_search_results()
        self.query = ''
        self.filterable_query = False
        self.cover_pool.abort()
        self.details_pool.abort()
        self.total_changed.emit(self.rowCount())
@ -113,7 +115,10 @@ class Matches(QAbstractItemModel):
    def filter_results(self):
        self.layoutAboutToBeChanged.emit()
-        if self.query:
+        # Only use the search filter's filtered results when there is a query
        # and it is a filterable query. This allows for the stores best guess
        # matches to come though.
        if self.query and self.filterable_query:
            self.matches = list(self.search_filter.parse(self.query))
        else:
            self.matches = list(self.search_filter.universal_set())
@ -134,6 +139,35 @@ class Matches(QAbstractItemModel):
    def set_query(self, query):
        self.query = query
        self.filterable_query = self.is_filterable_query(query)
    def is_filterable_query(self, query):
        # Remove control modifiers.
        query = query.replace('\\', '')
        query = query.replace('!', '')
        query = query.replace('=', '')
        query = query.replace('~', '')
        query = query.replace('>', '')
        query = query.replace('<', '')
        # Store the query at this point for comparision later
        mod_query = query
        # Remove filter identifiers
        # Remove the prefix.
        for loc in ('all', 'author', 'authors', 'title'):
            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
            query = query.replace('%s:' % loc, '')
        # Remove the prefix and search text.
        for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
            query = re.sub(r'%s:"[^"]"' % loc, '', query)
            query = re.sub(r'%s:[^\s]*' % loc, '', query)
        # Remove whitespace
        query = re.sub('\s', '', query)
        mod_query = re.sub('\s', '', mod_query)
        # If mod_query and query are the same then there were no filter modifiers
        # so this isn't a filterable query.
        if mod_query == query:
            return False
        return True
    def index(self, row, column, parent=QModelIndex()):
        return self.createIndex(row, column)
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -0,0 +1,81 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonESKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'charhale09-21'}
        store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -38,7 +38,9 @@ class AmazonFRKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon.fr is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -0,0 +1,81 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonITKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'httpcharles07-21'}
        store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('di '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
+++ b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
@ -63,9 +63,6 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
                a, b, id = id.partition('/item/')
                cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
                if cover_url.startswith('/'):
                    cover_url = cover_url[1:]
                cover_url = 'http://www.diesel-ebooks.com/' + cover_url
                title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
                author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
--- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py
+++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py
@ -54,7 +54,7 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="book_a" or @class="book_b"]'):
+            for data in doc.xpath('//div[@id="results"]//li'):
                if counter <= 0:
                    break
@ -64,15 +64,21 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
                    continue
                id = mo.group()
-                cover_url = ''.join(data.xpath('.//img[1]/@src'))
+                cover_url = ''
                cover_load = ''.join(data.xpath('.//div[@class="img"]//img/@onload'))
                mo = re.search('(?<=\').+?(?=\')', cover_load)
                if mo:
                    cover_url = mo.group();
                title = ''
                author = ''
-                heading_a = data.xpath('.//a[1]/text()')
+                header_parts = data.xpath('.//div[@class="descr"]/h4//a//text()')
-                if heading_a:
+                if header_parts:
-                    title = heading_a[0]
+                    title = header_parts[0]
-                if len(heading_a) >= 2:
+                    header_parts = header_parts[1:]
-                    author = heading_a[1]
+                if header_parts:
                    author = ', '.join(header_parts)
                counter -= 1
@ -98,22 +104,18 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
        with closing(br.open(url + id, timeout=timeout)) as nf:
            pdoc = html.fromstring(nf.read())
-            pdata = pdoc.xpath('//table[@class="price"]/tr/td/text()')
+            price_l = pdoc.xpath('//span[@class="price"]/text()')
-            if len(pdata) >= 2:
+            if price_l:
-                price = pdata[1]
+                price = price_l[0]
            search_result.price = price.strip()
            search_result.drm = SearchResult.DRM_UNLOCKED
-            for sec in ('Printing', 'Copying', 'Lending'):
+            permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()'))
-                if pdoc.xpath('boolean(//div[@class="formatTableInner"]//table//tr[contains(th, "%s") and contains(td, "Off")])' % sec):
+            if 'off' in permissions:
-                    search_result.drm = SearchResult.DRM_LOCKED
+                search_result.drm = SearchResult.DRM_LOCKED
                    break
-            fdata = ', '.join(pdoc.xpath('//table[@class="price"]//tr//td[1]/text()'))
+            fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()')
-            fdata = fdata.replace(':', '')
+            if len(fdata) > 1:
-            fdata = re.sub(r'\s{2,}', ' ', fdata)
+                search_result.formats = ', '.join(fdata[1:])
-            fdata = fdata.replace(' ,', ',')
+
            fdata = fdata.strip()
            search_result.formats = fdata
        search_result.price = price.strip()
        return True