Get Books: Update plugins for Amazon and B&N stores to handle website changes. Enable some stores by default on first run. Add Zixo store

2025-07-09 03:04:10 -04:00 · 2011-06-06 17:34:14 -06:00 · 2011-06-06 17:34:14 -06:00 · c54ee1d6a8
commit c54ee1d6a8
parent 7f7d900fef fff3cb147a
9 changed files with 238 additions and 45 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1418,6 +1418,15 @@ class StoreWoblinkStore(StoreBase):
    headquarters = 'PL'
    formats = ['EPUB']
 class StoreZixoStore(StoreBase):
    name = 'Zixo'
    author = u'Tomasz Długosz'
    description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
    actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore'
    headquarters = 'PL'
    formats = ['PDF, ZIXO']
 plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
@ -1452,7 +1461,8 @@ plugins += [
    StoreWeightlessBooksStore,
    StoreWHSmithUKStore,
    StoreWizardsTowerBooksStore,
-    StoreWoblinkStore
+    StoreWoblinkStore,
    StoreZixoStore
 ]
 # }}}
--- a/src/calibre/gui2/store/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/amazon_de_plugin.py
@ -6,21 +6,23 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import re, urllib
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
-from calibre.gui2.store.amazon_plugin import AmazonKindleStore
+from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
-class AmazonDEKindleStore(AmazonKindleStore):
+class AmazonDEKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
    details_url = 'http://amazon.de/dp/'
    drm_search_text = u'Gleichzeitige Verwendung von Geräten'
    drm_free_text = u'Keine Einschränkung'
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'charhale0a-21'}
        store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
@ -32,3 +34,94 @@ class AmazonDEKindleStore(AmazonKindleStore):
                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
        url =  search_url + urllib.quote_plus(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
            # Horizontal grid of books.
            if is_shot:
                data_xpath = '//div[contains(@class, "result")]'
                format_xpath = './/div[@class="productTitle"]/text()'
                cover_xpath = './/div[@class="productTitle"]//img/@src'
            # Vertical list of books.
            else:
                data_xpath = '//div[@class="productData"]'
                format_xpath = './/span[@class="format"]/text()'
                cover_xpath = '../div[@class="productImage"]/a/img/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin_href = None
                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
                    if m:
                        asin = m.group('asin')
                    else:
                        continue
                else:
                    continue
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
                if is_shot:
                    author = format.split(' von ')[-1]
                else:
                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
                    author = author.split(' von ')[-1]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                yield s
    def get_details(self, search_result, timeout):
        drm_search_text = u'Gleichzeitige Verwendung von Geräten'
        drm_free_text = u'Keine Einschränkung'
        url = 'http://amazon.de/dp/'
        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
                           drm_search_text + '")])'):
                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
                               drm_free_text + '") and contains(b, "' +
                               drm_search_text + '")])'):
                    search_result.drm = SearchResult.DRM_UNLOCKED
                else:
                    search_result.drm = SearchResult.DRM_UNKNOWN
            else:
                search_result.drm = SearchResult.DRM_LOCKED
        return True
--- a/src/calibre/gui2/store/amazon_plugin.py
+++ b/src/calibre/gui2/store/amazon_plugin.py
@ -131,16 +131,22 @@ class AmazonKindleStore(StorePlugin):
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            # Horizontal grid of books.
+            # Horizontal grid of books. Search "Paolo Bacigalupi"
            if is_shot:
                data_xpath = '//div[contains(@class, "result")]'
-                format_xpath = './/div[@class="productTitle"]/text()'
+                format_xpath = './/div[@class="productTitle"]//text()'
                asin_xpath = './/div[@class="productTitle"]//a'
                cover_xpath = './/div[@class="productTitle"]//img/@src'
-            # Vertical list of books.
+                title_xpath = './/div[@class="productTitle"]/a//text()'
                price_xpath = './/div[@class="newPrice"]/span/text()'
            # Vertical list of books. Search "martin"
            else:
-                data_xpath = '//div[@class="productData"]'
+                data_xpath = '//div[contains(@class, "results")]//div[contains(@class, "result")]'
-                format_xpath = './/span[@class="format"]/text()'
+                format_xpath = './/span[@class="binding"]//text()'
-                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+                asin_xpath = './/div[@class="image"]/a[1]'
                cover_xpath = './/img[@class="productImage"]/@src'
                title_xpath = './/a[@class="title"]/text()'
                price_xpath = './/span[@class="price"]/text()'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -157,7 +163,7 @@ class AmazonKindleStore(StorePlugin):
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin_href = None
-                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
+                asin_a = data.xpath(asin_xpath)
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
@ -170,13 +176,13 @@ class AmazonKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                title = ''.join(data.xpath(title_xpath))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath(price_xpath))
                if is_shot:
                    author = format.split(' by ')[-1]
                else:
-                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
+                    author = ''.join(data.xpath('.//span[@class="ptBrand"]/text()'))
                    author = author.split('by ')[-1]
                counter -= 1
--- a/src/calibre/gui2/store/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/amazon_uk_plugin.py
@ -15,17 +15,14 @@ from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
-from calibre.gui2.store.amazon_plugin import AmazonKindleStore
+from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
-class AmazonUKKindleStore(AmazonKindleStore):
+class AmazonUKKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
    details_url = 'http://amazon.co.uk/dp/'
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'calcharles-21'}
        store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
@ -36,7 +33,8 @@ class AmazonUKKindleStore(AmazonKindleStore):
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib.quote_plus(query)
+        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
        url =  search_url + urllib.quote_plus(query)
        br = browser()
        counter = max_results
@ -95,7 +93,9 @@ class AmazonUKKindleStore(AmazonKindleStore):
        if search_result.drm:
            return
-        url = self.details_url
+        url = 'http://amazon.co.uk/dp/'
        drm_search_text = u'Simultaneous Device Usage'
        drm_free_text = u'Unlimited'
        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
@ -106,10 +106,10 @@ class AmazonUKKindleStore(AmazonKindleStore):
                if is_kindle:
                    search_result.formats = 'Kindle'
            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
-                           self.drm_search_text + '")])'):
+                           drm_search_text + '")])'):
                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
-                               self.drm_free_text + '") and contains(b, "' +
+                               drm_free_text + '") and contains(b, "' +
-                               self.drm_search_text + '")])'):
+                               drm_search_text + '")])'):
                    search_result.drm = SearchResult.DRM_UNLOCKED
                else:
                    search_result.drm = SearchResult.DRM_UNKNOWN
--- a/src/calibre/gui2/store/bn_plugin.py
+++ b/src/calibre/gui2/store/bn_plugin.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 import random
 import re
 import urllib
 from contextlib import closing
 from lxml import html
@ -47,26 +46,26 @@ class BNStore(BasicStoreConfig, StorePlugin):
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
+        query = query.replace(' ', '-')
-        url += urllib.quote_plus(query)
+        url = 'http://www.barnesandnoble.com/s/%s?store=ebook&sze=%s' % (query, max_results)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//ul[contains(@class, "wgt-search-results-display")]/li[contains(@class, "search-result-item") and contains(@class, "nook-result-item")]'):
+            for data in doc.xpath('//ul[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break
-                id = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/@href'))
+                id = ''.join(data.xpath('.//div[contains(@class, "image")]/a/@href'))
                if not id:
                    continue
-                cover_url = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/img/@src'))
+                cover_url = ''.join(data.xpath('.//div[contains(@class, "image")]//img/@src'))
-                title = ''.join(data.xpath('.//span[@class="product-title"]/a/text()'))
+                title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
-                author = ', '.join(data.xpath('.//span[@class="contributers-line"]/a/text()'))
+                author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
-                price = ''.join(data.xpath('.//span[contains(@class, "onlinePriceValue2")]/text()'))
+                price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
                counter -= 1
@ -74,7 +73,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
-                s.price = price
+                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'
--- a/src/calibre/gui2/store/declined.txt
+++ b/src/calibre/gui2/store/declined.txt
@ -2,7 +2,8 @@ This is a list of stores that objected, declined
 or asked not to be included in the store integration.
 * Borders (http://www.borders.com/)
-* WH Smith (http://www.whsmith.co.uk/)
+* Indigo (http://www.chapters.indigo.ca/)
  Refused to permit signing up for the affiliate program
 * Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
  No reply with two attempts over 2 weeks
 * WH Smith (http://www.whsmith.co.uk/)
  Refused to permit signing up for the affiliate program
--- a/src/calibre/gui2/store/nexto_plugin.py
+++ b/src/calibre/gui2/store/nexto_plugin.py
@ -71,7 +71,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
                author = ''
                with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
-                    author = ''.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
+                    author = ', '.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
                counter -= 1
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -82,6 +82,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.restore_state()
    def setup_store_checks(self):
        first_run = self.config.get('first_run', True)
        # Add check boxes for each store so the user
        # can disable searching specific stores on a
        # per search basis.
@ -98,7 +100,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        icon = QIcon(I('donate.png'))
        for i, x in enumerate(sorted(self.gui.istores.keys(), key=lambda x: x.lower())):
            cbox = QCheckBox(x)
-            cbox.setChecked(existing.get(x, False))
+            cbox.setChecked(existing.get(x, first_run))
            store_list_layout.addWidget(cbox, i, 0, 1, 1)
            if self.gui.istores[x].base_plugin.affiliate:
                iw = QLabel(self)
@ -109,6 +111,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        store_list_layout.setRowStretch(store_list_layout.rowCount(), 10)
        self.store_list.setWidget(stores_check_widget)
        self.config['first_run'] = False
    def build_adv_search(self):
        adv = AdvSearchBuilderDialog(self)
        if adv.exec_() == QDialog.Accepted:
--- a/src/calibre/gui2/store/zixo_plugin.py
+++ b/src/calibre/gui2/store/zixo_plugin.py
@ -0,0 +1,80 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import re
 import urllib
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class ZixoStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://zixo.pl/e_ksiazki/start/'
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
        else:
            d = WebStoreDialog(self.gui, url, parent, detail_item)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(query.encode('utf-8')) + '&product_type=0'
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="productInline"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
                if not id:
                    continue
                cover_url = ''.join(data.xpath('.//a[@class="productThumb"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                author = ''.join(data.xpath('.//div[@class="productDescription"]/span[1]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="priceList"]/span/text()'))
                price = re.sub('\.', ',', price)
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://zixo.pl' + id.strip()
                s.drm = SearchResult.DRM_LOCKED
                yield s
    def get_details(self, search_result, timeout):
        br = browser()
        with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            formats = ''.join(idata.xpath('//ul[@class="prop"]/li[3]/text()'))
            formats = re.sub(r'\(.*\)', '', formats)
            formats = re.sub('Zixo Reader', 'ZIXO', formats)
            search_result.formats = formats
        return True