Get Books: Updates to various store plugins to deal with website changes: Amazon Europe, Waterstones, Foyles, B&N, Kobo and Empik

This commit is contained in:
Kovid Goyal 2012-03-21 08:27:57 +05:30
commit 2b60b652fa
13 changed files with 42 additions and 174 deletions

View File

@ -1538,6 +1538,7 @@ class StoreWaterstonesUKStore(StoreBase):
headquarters = 'UK'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books'
@ -1557,15 +1558,6 @@ class StoreWHSmithUKStore(StoreBase):
headquarters = 'UK'
formats = ['EPUB', 'PDF']
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
drm_free_only = True
headquarters = 'UK'
formats = ['EPUB', 'MOBI']
class StoreWoblinkStore(StoreBase):
name = 'Woblink'
author = u'Tomasz Długosz'
@ -1636,7 +1628,6 @@ plugins += [
StoreWaterstonesUKStore,
StoreWeightlessBooksStore,
StoreWHSmithUKStore,
StoreWizardsTowerBooksStore,
StoreWoblinkStore,
XinXiiStore,
StoreZixoStore

View File

@ -5,4 +5,3 @@ or asked not to be included in the store integration.
* Indigo (http://www.chapters.indigo.ca/).
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
* EPubBuy DE: reason: too much traffic for too little sales
* Empik (http://empik.com.pl).

View File

@ -41,7 +41,9 @@ class AmazonDEKindleStore(StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
@ -65,8 +67,8 @@ class AmazonDEKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('von '):

View File

@ -37,7 +37,9 @@ class AmazonESKindleStore(StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonESKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]

View File

@ -39,7 +39,7 @@ class AmazonFRKindleStore(StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon.fr is responding in UTF-8 now
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
@ -64,8 +64,8 @@ class AmazonFRKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]

View File

@ -37,7 +37,9 @@ class AmazonITKindleStore(StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonITKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('di '):
author = author[3:]

View File

@ -38,7 +38,8 @@ class AmazonUKKindleStore(StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
@ -62,8 +63,8 @@ class AmazonUKKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('by '):

View File

@ -62,7 +62,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[contains(@class, "bn-price")]/text()'))
counter -= 1

View File

@ -7,7 +7,8 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import random
import urllib2
import re
import urllib
from contextlib import closing
from lxml import html
@ -33,7 +34,7 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
detail_url = None
if detail_item:
detail_url = url + detail_item + aff_id
detail_url = detail_item + aff_id
url = url + aff_id
if external or self.config.get('open_external', False):
@ -45,33 +46,36 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib2.quote(query)
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="item clearfix"]'):
data = html.fromstring(html.tostring(data))
for data in doc.xpath('//div[contains(@class, "item")]'):
if counter <= 0:
break
id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
if not id or '/item/' not in id:
continue
a, b, id = id.partition('/item/')
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
title = ''.join(data.xpath('.//div[@class="content"]//h2/a/text()'))
author = ''.join(data.xpath('.//div[@class="content"]/span//a/text()'))
price = ''
price_elem = data.xpath('//td[@class="price"]/text()')
price_elem = data.xpath('.//div[@class="price_fat"]//h1/text()')
if price_elem:
price = price_elem[0]
formats = ', '.join(data.xpath('.//td[@class="format"]/text()'))
formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
a, b, formats = formats.partition('Format:')
drm = SearchResult.DRM_LOCKED
if 'drm free' not in formats.lower():
drm = SearchResult.DRM_UNLOCKED
counter -= 1
@ -80,19 +84,8 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = '/item/' + id.strip()
s.detail_item = id.strip()
s.formats = formats
s.drm = drm
yield s
def get_details(self, search_result, timeout):
url = 'http://www.diesel-ebooks.com/item/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -60,10 +60,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
continue
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
if cover_url:
cover_url = 'http://www.foyles.co.uk' + cover_url
#print(cover_url)
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))

View File

@ -68,7 +68,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()'))
author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
counter -= 1

View File

@ -57,7 +57,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
title = ''.join(data.xpath('./div/div/h2/a/text()'))
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceStandard"]/text()'))
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')

View File

@ -1,118 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
url = 'http://www.wizardstowerbooks.com/'
def open(self, parent=None, detail_item=None, external=False):
if detail_item:
detail_item = self.url + detail_item
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item)))
else:
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
if 'search.html' in f.geturl():
for data in doc.xpath('//table[@class="gridp"]//td'):
if counter <= 0:
break
id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
id = id.strip()
if not id:
continue
cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
cover_url = url_slash_cleaner(self.url + cover_url.strip())
price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
price = price.strip()
if not price:
continue
title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
author = ''.join(data.xpath('.//p[@class="last"]/text()'))
a, b, author = author.partition(' by ')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s
# Exact match brought us to the books detail page.
else:
s = SearchResult()
cover_url = ''.join(doc.xpath('//div[@id="image"]/a/img[@title="Zoom"]/@src')).strip()
s.cover_url = url_slash_cleaner(self.url + cover_url.strip())
s.title = ''.join(doc.xpath('//form[@name="details"]/h1/text()')).strip()
authors = doc.xpath('//p[contains(., "Author:")]//text()')
author_index = None
for i, a in enumerate(authors):
if 'author' in a.lower():
author_index = i + 1
break
if author_index is not None and len(authors) > author_index:
a = authors[author_index]
a = a.replace(u'\xa0', '')
s.author = a.strip()
s.price = ''.join(doc.xpath('//span[@id="price_selling"]//text()')).strip()
s.detail_item = f.geturl().replace(self.url, '').strip()
s.formats = ', '.join(doc.xpath('//select[@id="N1_"]//option//text()'))
s.drm = SearchResult.DRM_UNLOCKED
yield s
def get_details(self, search_result, timeout):
if search_result.formats:
return False
br = browser()
with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
search_result.formats = formats.upper()
return True