Get Books: Update the amazon, waterstones and libri.de plugins to account for website changes

This commit is contained in:
Kovid Goyal 2013-01-16 08:55:23 +05:30
commit 76582f2fe3
8 changed files with 402 additions and 46 deletions

View File

@ -1471,9 +1471,9 @@ class StoreLegimiStore(StoreBase):
affiliate = True affiliate = True
class StoreLibreDEStore(StoreBase): class StoreLibreDEStore(StoreBase):
name = 'Libri DE' name = 'ebook.de'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.' description = u'All Ihre Bücher immer dabei. Suchen, finden, kaufen: so einfach wie nie. ebook.de war libre.de'
actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore' actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'
headquarters = 'DE' headquarters = 'DE'

View File

@ -7,9 +7,100 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore from contextlib import closing
from lxml import html
class AmazonDEKindleStore(AmazonUKKindleStore): from PyQt4.Qt import QUrl
from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
if self.author_article:
author = author.split(self.author_article, 1)[1].split(" (")[0]
except:
pass
price = ''.join(data.xpath(price_xpath))
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
pass
class AmazonDEKindleStore(AmazonEUBase):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''

View File

@ -7,9 +7,99 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore from contextlib import closing
from lxml import html
class AmazonESKindleStore(AmazonUKKindleStore): from PyQt4.Qt import QUrl
from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
if self.author_article:
author = author.split(self.author_article, 1)[1].split(" (")[0]
except:
pass
price = ''.join(data.xpath(price_xpath))
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
pass
class AmazonESKindleStore(AmazonEUBase):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''

View File

@ -8,9 +8,100 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore from contextlib import closing
from lxml import html
class AmazonFRKindleStore(AmazonUKKindleStore): from PyQt4.Qt import QUrl
from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
if self.author_article:
author = author.split(self.author_article, 1)[1].split(" (")[0]
except:
pass
price = ''.join(data.xpath(price_xpath))
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
pass
class AmazonFRKindleStore(AmazonEUBase):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''

View File

@ -7,9 +7,100 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore from contextlib import closing
from lxml import html
class AmazonITKindleStore(AmazonUKKindleStore): from PyQt4.Qt import QUrl
from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
if self.author_article:
author = author.split(self.author_article, 1)[1].split(" (")[0]
except:
pass
price = ''.join(data.xpath(price_xpath))
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
pass
class AmazonITKindleStore(AmazonEUBase):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''

View File

@ -7,8 +7,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random
import re
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -131,7 +129,7 @@ class AmazonKindleStore(StorePlugin):
data_xpath = '//div[contains(@class, "prod")]' data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()' format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]' asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src' cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()' title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
@ -151,15 +149,9 @@ class AmazonKindleStore(StorePlugin):
# We must have an asin otherwise we can't easily reference the # We must have an asin otherwise we can't easily reference the
# book later. # book later.
asin_href = None asin = data.xpath(asin_xpath)
asin_a = data.xpath(asin_xpath) if asin:
if asin_a: asin = asin[0]
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else: else:
continue continue

View File

@ -7,8 +7,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -19,19 +17,12 @@ from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by ' # This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -54,7 +45,7 @@ class AmazonUKKindleStore(StorePlugin):
data_xpath = '//div[contains(@class, "prod")]' data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()' format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]' asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src' cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()' title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
@ -74,15 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
# We must have an asin otherwise we can't easily reference the # We must have an asin otherwise we can't easily reference the
# book later. # book later.
asin_href = None asin = data.xpath(asin_xpath)
asin_a = data.xpath(asin_xpath) if asin:
if asin_a: asin = asin[0]
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else: else:
continue continue
@ -113,3 +98,17 @@ class AmazonUKKindleStore(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonUKKindleStore(AmazonEUBase):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '

View File

@ -41,7 +41,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.waterstones.com/waterstonesweb/advancedSearch.do?buttonClicked=1&format=3757&bookkeywords=' + urllib2.quote(query) url = 'http://www.waterstones.com/waterstonesweb/simpleSearch.do?simpleSearchString=ebook+' + urllib2.quote(query)
br = browser() br = browser()
@ -56,6 +56,8 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
if not id: if not id:
continue continue
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src')) cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
if not cover_url.startswith("http"):
cover_url = 'http://www.waterstones.com' + cover_url
title = ''.join(data.xpath('./div/div/h2/a/text()')) title = ''.join(data.xpath('./div/div/h2/a/text()'))
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()')) author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()')) price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))