Get Books: Updates to various store plugins to deal with website changes: Amazon Europe, Waterstones, Foyles, B&N, Kobo and Empik

2025-07-07 10:14:46 -04:00 · 2012-03-21 08:27:57 +05:30 · 2012-03-21 08:27:57 +05:30 · 2b60b652fa
commit 2b60b652fa
parent 931d46cd84 d0507f9010
13 changed files with 42 additions and 174 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1538,6 +1538,7 @@ class StoreWaterstonesUKStore(StoreBase):
    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
    affiliate = True
 class StoreWeightlessBooksStore(StoreBase):
    name = 'Weightless Books'
@ -1557,15 +1558,6 @@ class StoreWHSmithUKStore(StoreBase):
    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
 class StoreWizardsTowerBooksStore(StoreBase):
    name = 'Wizards Tower Books'
    description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
    actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
    drm_free_only = True
    headquarters = 'UK'
    formats = ['EPUB', 'MOBI']
 class StoreWoblinkStore(StoreBase):
    name = 'Woblink'
    author = u'Tomasz Długosz'
@ -1636,7 +1628,6 @@ plugins += [
    StoreWaterstonesUKStore,
    StoreWeightlessBooksStore,
    StoreWHSmithUKStore,
    StoreWizardsTowerBooksStore,
    StoreWoblinkStore,
    XinXiiStore,
    StoreZixoStore
--- a/src/calibre/gui2/store/declined.txt
+++ b/src/calibre/gui2/store/declined.txt
@ -5,4 +5,3 @@ or asked not to be included in the store integration.
 * Indigo (http://www.chapters.indigo.ca/).
 * Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
 * EPubBuy DE: reason: too much traffic for too little sales
 * Empik (http://empik.com.pl).
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -41,7 +41,9 @@ class AmazonDEKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe  is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
@ -65,8 +67,8 @@ class AmazonDEKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
                if author.startswith('von '):
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -37,7 +37,9 @@ class AmazonESKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonESKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -39,7 +39,7 @@ class AmazonFRKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
-            # Apparently amazon.fr is responding in UTF-8 now
+            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
@ -64,8 +64,8 @@ class AmazonFRKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -37,7 +37,9 @@ class AmazonITKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonITKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('di '):
                    author = author[3:]
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -38,7 +38,8 @@ class AmazonUKKindleStore(StorePlugin):
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
@ -62,8 +63,8 @@ class AmazonUKKindleStore(StorePlugin):
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
                if author.startswith('by '):
--- a/src/calibre/gui2/store/stores/bn_plugin.py
+++ b/src/calibre/gui2/store/stores/bn_plugin.py
@ -62,7 +62,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
                title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
                author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
-                price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
+                price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[contains(@class, "bn-price")]/text()'))
                counter -= 1
--- a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
+++ b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
@ -7,7 +7,8 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import random
-import urllib2
+import re
 import urllib
 from contextlib import closing
 from lxml import html
@ -33,7 +34,7 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
        detail_url = None
        if detail_item:
-            detail_url = url + detail_item + aff_id
+            detail_url = detail_item + aff_id
        url = url + aff_id
        if external or self.config.get('open_external', False):
@ -45,33 +46,36 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib2.quote(query)
+        url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib.quote_plus(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="item clearfix"]'):
+            for data in doc.xpath('//div[contains(@class, "item")]'):
                data = html.fromstring(html.tostring(data))
                if counter <= 0:
                    break
                id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
                if not id or '/item/' not in id:
                    continue
                a, b, id = id.partition('/item/')
                cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
-                title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
+                title = ''.join(data.xpath('.//div[@class="content"]//h2/a/text()'))
-                author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
+                author = ''.join(data.xpath('.//div[@class="content"]/span//a/text()'))
                price = ''
-                price_elem = data.xpath('//td[@class="price"]/text()')
+                price_elem = data.xpath('.//div[@class="price_fat"]//h1/text()')
                if price_elem:
                    price = price_elem[0]
-                formats = ', '.join(data.xpath('.//td[@class="format"]/text()'))
+                formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
                a, b, formats = formats.partition('Format:')
                drm = SearchResult.DRM_LOCKED
                if 'drm free' not in formats.lower():
                    drm = SearchResult.DRM_UNLOCKED
                counter -= 1
@ -80,19 +84,8 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
-                s.detail_item = '/item/' + id.strip()
+                s.detail_item = id.strip()
                s.formats = formats
                s.drm = drm
                yield s
    def get_details(self, search_result, timeout):
        url = 'http://www.diesel-ebooks.com/item/'
        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
                search_result.drm = SearchResult.DRM_UNLOCKED
            else:
                search_result.drm = SearchResult.DRM_LOCKED
        return True
--- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py
@ -60,10 +60,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
                    continue
                cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
                if cover_url:
                    cover_url = 'http://www.foyles.co.uk' + cover_url
                #print(cover_url)
                title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
                author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
                price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
--- a/src/calibre/gui2/store/stores/kobo_plugin.py
+++ b/src/calibre/gui2/store/stores/kobo_plugin.py
@ -68,7 +68,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
                cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
                title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
-                author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()'))
+                author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
                drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
                counter -= 1
--- a/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
@ -57,7 +57,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
                cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
                title = ''.join(data.xpath('./div/div/h2/a/text()'))
                author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
-                price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceStandard"]/text()'))
+                price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))
                drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
                pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
                epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')
--- a/src/calibre/gui2/store/stores/wizards_tower_books_plugin.py
+++ b/src/calibre/gui2/store/stores/wizards_tower_books_plugin.py
@ -1,118 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
    url = 'http://www.wizardstowerbooks.com/'
    def open(self, parent=None, detail_item=None, external=False):
        if detail_item:
            detail_item = self.url + detail_item
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item)))
        else:
            d = WebStoreDialog(self.gui, self.url, parent, detail_item)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            if 'search.html' in f.geturl():
                for data in doc.xpath('//table[@class="gridp"]//td'):
                    if counter <= 0:
                        break
                    id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
                    id = id.strip()
                    if not id:
                        continue
                    cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
                    cover_url = url_slash_cleaner(self.url + cover_url.strip())
                    price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
                    price = price.strip()
                    if not price:
                        continue
                    title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
                    author = ''.join(data.xpath('.//p[@class="last"]/text()'))
                    a, b, author = author.partition(' by ')
                    counter -= 1
                    s = SearchResult()
                    s.cover_url = cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price.strip()
                    s.detail_item = id.strip()
                    s.drm = SearchResult.DRM_UNLOCKED
                    yield s
            # Exact match brought us to the books detail page.
            else:
                s = SearchResult()
                cover_url = ''.join(doc.xpath('//div[@id="image"]/a/img[@title="Zoom"]/@src')).strip()
                s.cover_url = url_slash_cleaner(self.url + cover_url.strip())
                s.title = ''.join(doc.xpath('//form[@name="details"]/h1/text()')).strip()
                authors = doc.xpath('//p[contains(., "Author:")]//text()')
                author_index = None
                for i, a in enumerate(authors):
                    if 'author' in a.lower():
                        author_index = i + 1
                        break
                if author_index is not None and len(authors) > author_index:
                    a = authors[author_index]
                    a = a.replace(u'\xa0', '')
                    s.author = a.strip() 
                s.price = ''.join(doc.xpath('//span[@id="price_selling"]//text()')).strip()
                s.detail_item = f.geturl().replace(self.url, '').strip()
                s.formats = ', '.join(doc.xpath('//select[@id="N1_"]//option//text()'))
                s.drm = SearchResult.DRM_UNLOCKED
                yield s
    def get_details(self, search_result, timeout):
        if search_result.formats:
            return False
        br = browser()
        with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
            search_result.formats = formats.upper()
        return True