Get Books: Update plugins for Amazon and B&N stores to handle website changes. Enable some stores by default on first run. Add Zixo store

2025-07-09 03:04:10 -04:00 · 2011-06-06 17:34:14 -06:00 · 2011-06-06 17:34:14 -06:00 · c54ee1d6a8
commit c54ee1d6a8
parent 7f7d900fef fff3cb147a
9 changed files with 238 additions and 45 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1418,6 +1418,15 @@ class StoreWoblinkStore(StoreBase):
    headquarters = 'PL'
    formats = ['EPUB']

+class StoreZixoStore(StoreBase):
+    name = 'Zixo'
+    author = u'Tomasz Długosz'
+    description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
+    actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore'
+
+    headquarters = 'PL'
+    formats = ['PDF, ZIXO']
+
 plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
@ -1452,7 +1461,8 @@ plugins += [
    StoreWeightlessBooksStore,
    StoreWHSmithUKStore,
    StoreWizardsTowerBooksStore,
-    StoreWoblinkStore
+    StoreWoblinkStore,
+    StoreZixoStore
 ]

 # }}}
--- a/src/calibre/gui2/store/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/amazon_de_plugin.py
@ -6,21 +6,23 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+import re, urllib
+from contextlib import closing
+
+from lxml import html
+
 from PyQt4.Qt import QUrl

+from calibre import browser
 from calibre.gui2 import open_url
-from calibre.gui2.store.amazon_plugin import AmazonKindleStore
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.search_result import SearchResult

-class AmazonDEKindleStore(AmazonKindleStore):
+class AmazonDEKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-    details_url = 'http://amazon.de/dp/'
-    drm_search_text = u'Gleichzeitige Verwendung von Geräten'
-    drm_free_text = u'Keine Einschränkung'
-
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'charhale0a-21'}
        store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
@ -32,3 +34,94 @@ class AmazonDEKindleStore(AmazonKindleStore):
                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url =  search_url + urllib.quote_plus(query)
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())
+
+            # Amazon has two results pages.
+            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+            # Horizontal grid of books.
+            if is_shot:
+                data_xpath = '//div[contains(@class, "result")]'
+                format_xpath = './/div[@class="productTitle"]/text()'
+                cover_xpath = './/div[@class="productTitle"]//img/@src'
+            # Vertical list of books.
+            else:
+                data_xpath = '//div[@class="productData"]'
+                format_xpath = './/span[@class="format"]/text()'
+                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). Se we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin_href = None
+                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
+                if asin_a:
+                    asin_href = asin_a[0].get('href', '')
+                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
+                    if m:
+                        asin = m.group('asin')
+                    else:
+                        continue
+                else:
+                    continue
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+
+                if is_shot:
+                    author = format.split(' von ')[-1]
+                else:
+                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
+                    author = author.split(' von ')[-1]
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.formats = 'Kindle'
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        drm_search_text = u'Gleichzeitige Verwendung von Geräten'
+        drm_free_text = u'Keine Einschränkung'
+        url = 'http://amazon.de/dp/'
+
+        br = browser()
+        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
+            idata = html.fromstring(nf.read())
+            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
+                           drm_search_text + '")])'):
+                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
+                               drm_free_text + '") and contains(b, "' +
+                               drm_search_text + '")])'):
+                    search_result.drm = SearchResult.DRM_UNLOCKED
+                else:
+                    search_result.drm = SearchResult.DRM_UNKNOWN
+            else:
+                search_result.drm = SearchResult.DRM_LOCKED
+        return True
--- a/src/calibre/gui2/store/amazon_plugin.py
+++ b/src/calibre/gui2/store/amazon_plugin.py
@ -131,16 +131,22 @@ class AmazonKindleStore(StorePlugin):
            
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            # Horizontal grid of books.
+            # Horizontal grid of books. Search "Paolo Bacigalupi"
            if is_shot:
                data_xpath = '//div[contains(@class, "result")]'
-                format_xpath = './/div[@class="productTitle"]/text()'
+                format_xpath = './/div[@class="productTitle"]//text()'
+                asin_xpath = './/div[@class="productTitle"]//a'
                cover_xpath = './/div[@class="productTitle"]//img/@src'
-            # Vertical list of books.
+                title_xpath = './/div[@class="productTitle"]/a//text()'
+                price_xpath = './/div[@class="newPrice"]/span/text()'
+            # Vertical list of books. Search "martin"
            else:
-                data_xpath = '//div[@class="productData"]'
-                format_xpath = './/span[@class="format"]/text()'
-                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+                data_xpath = '//div[contains(@class, "results")]//div[contains(@class, "result")]'
+                format_xpath = './/span[@class="binding"]//text()'
+                asin_xpath = './/div[@class="image"]/a[1]'
+                cover_xpath = './/img[@class="productImage"]/@src'
+                title_xpath = './/a[@class="title"]/text()'
+                price_xpath = './/span[@class="price"]/text()'
            
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -157,7 +163,7 @@ class AmazonKindleStore(StorePlugin):
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin_href = None
-                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
+                asin_a = data.xpath(asin_xpath)
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
@ -170,14 +176,14 @@ class AmazonKindleStore(StorePlugin):
                
                cover_url = ''.join(data.xpath(cover_xpath))

-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                title = ''.join(data.xpath(title_xpath))
+                price = ''.join(data.xpath(price_xpath))
                
                if is_shot:
                    author = format.split(' by ')[-1]
                else:
-                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
-                    author = author.split(' by ')[-1]
+                    author = ''.join(data.xpath('.//span[@class="ptBrand"]/text()'))
+                    author = author.split('by ')[-1]
                
                counter -= 1
    
--- a/src/calibre/gui2/store/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/amazon_uk_plugin.py
@ -15,17 +15,14 @@ from PyQt4.Qt import QUrl

 from calibre import browser
 from calibre.gui2 import open_url
-from calibre.gui2.store.amazon_plugin import AmazonKindleStore
+from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult

-class AmazonUKKindleStore(AmazonKindleStore):
+class AmazonUKKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-    details_url = 'http://amazon.co.uk/dp/'
-
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'calcharles-21'}
        store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
@ -36,7 +33,8 @@ class AmazonUKKindleStore(AmazonKindleStore):
        open_url(QUrl(store_link))

    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib.quote_plus(query)
+        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url =  search_url + urllib.quote_plus(query)
        br = browser()

        counter = max_results
@ -95,7 +93,9 @@ class AmazonUKKindleStore(AmazonKindleStore):
        if search_result.drm:
            return

-        url = self.details_url
+        url = 'http://amazon.co.uk/dp/'
+        drm_search_text = u'Simultaneous Device Usage'
+        drm_free_text = u'Unlimited'

        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
@ -106,10 +106,10 @@ class AmazonUKKindleStore(AmazonKindleStore):
                if is_kindle:
                    search_result.formats = 'Kindle'
            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
-                           self.drm_search_text + '")])'):
+                           drm_search_text + '")])'):
                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
-                               self.drm_free_text + '") and contains(b, "' +
-                               self.drm_search_text + '")])'):
+                               drm_free_text + '") and contains(b, "' +
+                               drm_search_text + '")])'):
                    search_result.drm = SearchResult.DRM_UNLOCKED
                else:
                    search_result.drm = SearchResult.DRM_UNKNOWN
--- a/src/calibre/gui2/store/bn_plugin.py
+++ b/src/calibre/gui2/store/bn_plugin.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'

 import random
 import re
-import urllib
 from contextlib import closing

 from lxml import html
@ -47,26 +46,26 @@ class BNStore(BasicStoreConfig, StorePlugin):
            d.exec_()

    def search(self, query, max_results=10, timeout=60):
-        url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
-        url += urllib.quote_plus(query)
+        query = query.replace(' ', '-')
+        url = 'http://www.barnesandnoble.com/s/%s?store=ebook&sze=%s' % (query, max_results)
        
        br = browser()
        
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//ul[contains(@class, "wgt-search-results-display")]/li[contains(@class, "search-result-item") and contains(@class, "nook-result-item")]'):
+            for data in doc.xpath('//ul[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break
                
-                id = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/@href'))
+                id = ''.join(data.xpath('.//div[contains(@class, "image")]/a/@href'))
                if not id:
                    continue
-                cover_url = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/img/@src'))
+                cover_url = ''.join(data.xpath('.//div[contains(@class, "image")]//img/@src'))
                
-                title = ''.join(data.xpath('.//span[@class="product-title"]/a/text()'))
-                author = ', '.join(data.xpath('.//span[@class="contributers-line"]/a/text()'))
-                price = ''.join(data.xpath('.//span[contains(@class, "onlinePriceValue2")]/text()'))
+                title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
+                author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
+                price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
                
                counter -= 1
                
@ -74,7 +73,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
-                s.price = price
+                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'
--- a/src/calibre/gui2/store/declined.txt
+++ b/src/calibre/gui2/store/declined.txt
@ -2,7 +2,8 @@ This is a list of stores that objected, declined
 or asked not to be included in the store integration.

 * Borders (http://www.borders.com/)
-* WH Smith (http://www.whsmith.co.uk/)
-  Refused to permit signing up for the affiliate program
+* Indigo (http://www.chapters.indigo.ca/)
 * Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
  No reply with two attempts over 2 weeks
+* WH Smith (http://www.whsmith.co.uk/)
+  Refused to permit signing up for the affiliate program
--- a/src/calibre/gui2/store/nexto_plugin.py
+++ b/src/calibre/gui2/store/nexto_plugin.py
@ -71,7 +71,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
                author = ''
                with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
-                    author = ''.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
+                    author = ', '.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))

                counter -= 1

--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -82,6 +82,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.restore_state()

    def setup_store_checks(self):
+        first_run = self.config.get('first_run', True)
+        
        # Add check boxes for each store so the user
        # can disable searching specific stores on a
        # per search basis.
@ -98,7 +100,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        icon = QIcon(I('donate.png'))
        for i, x in enumerate(sorted(self.gui.istores.keys(), key=lambda x: x.lower())):
            cbox = QCheckBox(x)
-            cbox.setChecked(existing.get(x, False))
+            cbox.setChecked(existing.get(x, first_run))
            store_list_layout.addWidget(cbox, i, 0, 1, 1)
            if self.gui.istores[x].base_plugin.affiliate:
                iw = QLabel(self)
@ -109,6 +111,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        store_list_layout.setRowStretch(store_list_layout.rowCount(), 10)
        self.store_list.setWidget(stores_check_widget)
        
+        self.config['first_run'] = False
+
    def build_adv_search(self):
        adv = AdvSearchBuilderDialog(self)
        if adv.exec_() == QDialog.Accepted:
--- a/src/calibre/gui2/store/zixo_plugin.py
+++ b/src/calibre/gui2/store/zixo_plugin.py
@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+import urllib
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser, url_slash_cleaner
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class ZixoStore(BasicStoreConfig, StorePlugin):
+
+    def open(self, parent=None, detail_item=None, external=False):
+
+        url = 'http://zixo.pl/e_ksiazki/start/'
+
+        if external or self.config.get('open_external', False):
+            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
+        else:
+            d = WebStoreDialog(self.gui, url, parent, detail_item)
+            d.setWindowTitle(self.name)
+            d.set_tags(self.config.get('tags', ''))
+            d.exec_()
+
+    def search(self, query, max_results=10, timeout=60):
+        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(query.encode('utf-8')) + '&product_type=0'
+
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read())
+            for data in doc.xpath('//div[@class="productInline"]'):
+                if counter <= 0:
+                    break
+
+                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
+                if not id:
+                    continue
+
+                cover_url = ''.join(data.xpath('.//a[@class="productThumb"]/img/@src'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
+                author = ''.join(data.xpath('.//div[@class="productDescription"]/span[1]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="priceList"]/span/text()'))
+                price = re.sub('\.', ',', price)
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price
+                s.detail_item = 'http://zixo.pl' + id.strip()
+                s.drm = SearchResult.DRM_LOCKED
+
+                yield s
+
+    def get_details(self, search_result, timeout):
+        br = browser()
+        with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
+            idata = html.fromstring(nf.read())
+            formats = ''.join(idata.xpath('//ul[@class="prop"]/li[3]/text()'))
+            formats = re.sub(r'\(.*\)', '', formats)
+            formats = re.sub('Zixo Reader', 'ZIXO', formats)
+            search_result.formats = formats
+        return True