Get Books: Update plugins for Amazon and B&N stores to handle website changes. Enable some stores by default on first run. Add Zixo store

This commit is contained in:
Kovid Goyal 2011-06-06 17:34:14 -06:00
commit c54ee1d6a8
9 changed files with 238 additions and 45 deletions

View File

@ -1418,6 +1418,15 @@ class StoreWoblinkStore(StoreBase):
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB'] formats = ['EPUB']
class StoreZixoStore(StoreBase):
name = 'Zixo'
author = u'Tomasz Długosz'
description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore'
headquarters = 'PL'
formats = ['PDF, ZIXO']
plugins += [ plugins += [
StoreArchiveOrgStore, StoreArchiveOrgStore,
StoreAmazonKindleStore, StoreAmazonKindleStore,
@ -1452,7 +1461,8 @@ plugins += [
StoreWeightlessBooksStore, StoreWeightlessBooksStore,
StoreWHSmithUKStore, StoreWHSmithUKStore,
StoreWizardsTowerBooksStore, StoreWizardsTowerBooksStore,
StoreWoblinkStore StoreWoblinkStore,
StoreZixoStore
] ]
# }}} # }}}

View File

@ -6,21 +6,23 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.amazon_plugin import AmazonKindleStore from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(AmazonKindleStore): class AmazonDEKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
details_url = 'http://amazon.de/dp/'
drm_search_text = u'Gleichzeitige Verwendung von Geräten'
drm_free_text = u'Keine Einschränkung'
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale0a-21'} aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
@ -32,3 +34,94 @@ class AmazonDEKindleStore(AmazonKindleStore):
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
open_url(QUrl(store_link)) open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
# Amazon has two results pages.
is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
# Horizontal grid of books.
if is_shot:
data_xpath = '//div[contains(@class, "result")]'
format_xpath = './/div[@class="productTitle"]/text()'
cover_xpath = './/div[@class="productTitle"]//img/@src'
# Vertical list of books.
else:
data_xpath = '//div[@class="productData"]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = '../div[@class="productImage"]/a/img/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin_href = None
asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
if asin_a:
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
if is_shot:
author = format.split(' von ')[-1]
else:
author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
author = author.split(' von ')[-1]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
drm_search_text = u'Gleichzeitige Verwendung von Geräten'
drm_free_text = u'Keine Einschränkung'
url = 'http://amazon.de/dp/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
drm_free_text + '") and contains(b, "' +
drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -131,16 +131,22 @@ class AmazonKindleStore(StorePlugin):
# Amazon has two results pages. # Amazon has two results pages.
is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])') is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
# Horizontal grid of books. # Horizontal grid of books. Search "Paolo Bacigalupi"
if is_shot: if is_shot:
data_xpath = '//div[contains(@class, "result")]' data_xpath = '//div[contains(@class, "result")]'
format_xpath = './/div[@class="productTitle"]/text()' format_xpath = './/div[@class="productTitle"]//text()'
asin_xpath = './/div[@class="productTitle"]//a'
cover_xpath = './/div[@class="productTitle"]//img/@src' cover_xpath = './/div[@class="productTitle"]//img/@src'
# Vertical list of books. title_xpath = './/div[@class="productTitle"]/a//text()'
price_xpath = './/div[@class="newPrice"]/span/text()'
# Vertical list of books. Search "martin"
else: else:
data_xpath = '//div[@class="productData"]' data_xpath = '//div[contains(@class, "results")]//div[contains(@class, "result")]'
format_xpath = './/span[@class="format"]/text()' format_xpath = './/span[@class="binding"]//text()'
cover_xpath = '../div[@class="productImage"]/a/img/@src' asin_xpath = './/div[@class="image"]/a[1]'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/a[@class="title"]/text()'
price_xpath = './/span[@class="price"]/text()'
for data in doc.xpath(data_xpath): for data in doc.xpath(data_xpath):
if counter <= 0: if counter <= 0:
@ -157,7 +163,7 @@ class AmazonKindleStore(StorePlugin):
# We must have an asin otherwise we can't easily reference the # We must have an asin otherwise we can't easily reference the
# book later. # book later.
asin_href = None asin_href = None
asin_a = data.xpath('.//div[@class="productTitle"]/a[1]') asin_a = data.xpath(asin_xpath)
if asin_a: if asin_a:
asin_href = asin_a[0].get('href', '') asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href) m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
@ -170,13 +176,13 @@ class AmazonKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()')) title = ''.join(data.xpath(title_xpath))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath(price_xpath))
if is_shot: if is_shot:
author = format.split(' by ')[-1] author = format.split(' by ')[-1]
else: else:
author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()')) author = ''.join(data.xpath('.//span[@class="ptBrand"]/text()'))
author = author.split('by ')[-1] author = author.split('by ')[-1]
counter -= 1 counter -= 1

View File

@ -15,17 +15,14 @@ from PyQt4.Qt import QUrl
from calibre import browser from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.amazon_plugin import AmazonKindleStore from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonUKKindleStore(AmazonKindleStore): class AmazonUKKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
details_url = 'http://amazon.co.uk/dp/'
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'calcharles-21'} aff_id = {'tag': 'calcharles-21'}
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
@ -36,7 +33,8 @@ class AmazonUKKindleStore(AmazonKindleStore):
open_url(QUrl(store_link)) open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + urllib.quote_plus(query) search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + urllib.quote_plus(query)
br = browser() br = browser()
counter = max_results counter = max_results
@ -95,7 +93,9 @@ class AmazonUKKindleStore(AmazonKindleStore):
if search_result.drm: if search_result.drm:
return return
url = self.details_url url = 'http://amazon.co.uk/dp/'
drm_search_text = u'Simultaneous Device Usage'
drm_free_text = u'Unlimited'
br = browser() br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
@ -106,10 +106,10 @@ class AmazonUKKindleStore(AmazonKindleStore):
if is_kindle: if is_kindle:
search_result.formats = 'Kindle' search_result.formats = 'Kindle'
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
self.drm_search_text + '")])'): drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
self.drm_free_text + '") and contains(b, "' + drm_free_text + '") and contains(b, "' +
self.drm_search_text + '")])'): drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED search_result.drm = SearchResult.DRM_UNLOCKED
else: else:
search_result.drm = SearchResult.DRM_UNKNOWN search_result.drm = SearchResult.DRM_UNKNOWN

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
import random import random
import re import re
import urllib
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -47,26 +46,26 @@ class BNStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results query = query.replace(' ', '-')
url += urllib.quote_plus(query) url = 'http://www.barnesandnoble.com/s/%s?store=ebook&sze=%s' % (query, max_results)
br = browser() br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//ul[contains(@class, "wgt-search-results-display")]/li[contains(@class, "search-result-item") and contains(@class, "nook-result-item")]'): for data in doc.xpath('//ul[contains(@class, "result-set")]/li[contains(@class, "result")]'):
if counter <= 0: if counter <= 0:
break break
id = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/@href')) id = ''.join(data.xpath('.//div[contains(@class, "image")]/a/@href'))
if not id: if not id:
continue continue
cover_url = ''.join(data.xpath('.//div[contains(@class, "wgt-product-image-module")]/a/img/@src')) cover_url = ''.join(data.xpath('.//div[contains(@class, "image")]//img/@src'))
title = ''.join(data.xpath('.//span[@class="product-title"]/a/text()')) title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
author = ', '.join(data.xpath('.//span[@class="contributers-line"]/a/text()')) author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
price = ''.join(data.xpath('.//span[contains(@class, "onlinePriceValue2")]/text()')) price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
counter -= 1 counter -= 1
@ -74,7 +73,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price.strip()
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNKNOWN s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Nook' s.formats = 'Nook'

View File

@ -2,7 +2,8 @@ This is a list of stores that objected, declined
or asked not to be included in the store integration. or asked not to be included in the store integration.
* Borders (http://www.borders.com/) * Borders (http://www.borders.com/)
* WH Smith (http://www.whsmith.co.uk/) * Indigo (http://www.chapters.indigo.ca/)
Refused to permit signing up for the affiliate program
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/). * Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
No reply with two attempts over 2 weeks No reply with two attempts over 2 weeks
* WH Smith (http://www.whsmith.co.uk/)
Refused to permit signing up for the affiliate program

View File

@ -71,7 +71,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
author = '' author = ''
with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf: with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read()) idata = html.fromstring(nf.read())
author = ''.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()')) author = ', '.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
counter -= 1 counter -= 1

View File

@ -82,6 +82,8 @@ class SearchDialog(QDialog, Ui_Dialog):
self.restore_state() self.restore_state()
def setup_store_checks(self): def setup_store_checks(self):
first_run = self.config.get('first_run', True)
# Add check boxes for each store so the user # Add check boxes for each store so the user
# can disable searching specific stores on a # can disable searching specific stores on a
# per search basis. # per search basis.
@ -98,7 +100,7 @@ class SearchDialog(QDialog, Ui_Dialog):
icon = QIcon(I('donate.png')) icon = QIcon(I('donate.png'))
for i, x in enumerate(sorted(self.gui.istores.keys(), key=lambda x: x.lower())): for i, x in enumerate(sorted(self.gui.istores.keys(), key=lambda x: x.lower())):
cbox = QCheckBox(x) cbox = QCheckBox(x)
cbox.setChecked(existing.get(x, False)) cbox.setChecked(existing.get(x, first_run))
store_list_layout.addWidget(cbox, i, 0, 1, 1) store_list_layout.addWidget(cbox, i, 0, 1, 1)
if self.gui.istores[x].base_plugin.affiliate: if self.gui.istores[x].base_plugin.affiliate:
iw = QLabel(self) iw = QLabel(self)
@ -109,6 +111,8 @@ class SearchDialog(QDialog, Ui_Dialog):
store_list_layout.setRowStretch(store_list_layout.rowCount(), 10) store_list_layout.setRowStretch(store_list_layout.rowCount(), 10)
self.store_list.setWidget(stores_check_widget) self.store_list.setWidget(stores_check_widget)
self.config['first_run'] = False
def build_adv_search(self): def build_adv_search(self):
adv = AdvSearchBuilderDialog(self) adv = AdvSearchBuilderDialog(self)
if adv.exec_() == QDialog.Accepted: if adv.exec_() == QDialog.Accepted:

View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class ZixoStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://zixo.pl/e_ksiazki/start/'
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(query.encode('utf-8')) + '&product_type=0'
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="productInline"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//a[@class="productThumb"]/img/@src'))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
author = ''.join(data.xpath('.//div[@class="productDescription"]/span[1]/a/text()'))
price = ''.join(data.xpath('.//div[@class="priceList"]/span/text()'))
price = re.sub('\.', ',', price)
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = 'http://zixo.pl' + id.strip()
s.drm = SearchResult.DRM_LOCKED
yield s
def get_details(self, search_result, timeout):
br = browser()
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
formats = ''.join(idata.xpath('//ul[@class="prop"]/li[3]/text()'))
formats = re.sub(r'\(.*\)', '', formats)
formats = re.sub('Zixo Reader', 'ZIXO', formats)
search_result.formats = formats
return True