Merge changes from lp:~cbhaley/calibre/charles_store.

This commit is contained in:
John Schember 2012-11-06 18:53:05 -05:00
commit 07f288dd16
6 changed files with 98 additions and 364 deletions

View File

@ -6,102 +6,19 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(StorePlugin):
class AmazonDEKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('von '):
author = author[4:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
drm_search_text = u'Gleichzeitige Verwendung von Geräten'
drm_free_text = u'Keine Einschränkung'
url = 'http://amazon.de/dp/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
drm_free_text + '") and contains(b, "' +
drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -6,78 +6,17 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonESKindleStore(StorePlugin):
class AmazonESKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale09-21'}
store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,79 +6,16 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from lxml import html
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin):
class AmazonFRKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,78 +6,17 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonITKindleStore(StorePlugin):
class AmazonITKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'httpcharles07-21'}
store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('di '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,8 +6,9 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
import re
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
@ -18,57 +19,80 @@ from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'calcharles-21'}
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
store_link = self.store_link % self.aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
asin_href = None
asin_a = data.xpath(asin_xpath)
if asin_a:
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
author = author.split('by ', 1)[1].split(" (")[0]
except:
pass
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('by '):
author = author[3:]
price = ''.join(data.xpath(price_xpath))
counter -= 1
@ -78,37 +102,10 @@ class AmazonUKKindleStore(StorePlugin):
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
# We might already have been called.
if search_result.drm:
return
url = 'http://amazon.co.uk/dp/'
drm_search_text = u'Simultaneous Device Usage'
drm_free_text = u'Unlimited'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if not search_result.author:
search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()'))
is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])')
if is_kindle:
search_result.formats = 'Kindle'
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
drm_free_text + '") and contains(b, "' +
drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True
pass

View File

@ -25,7 +25,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
url_details = ('http://ad.zanox.com/ppc/?18817073C15644254T&ULP=[['
'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
'http://www.ebook.de/shop/action/productDetails?artiId={0}]]')
if external or self.config.get('open_external', False):
if detail_item:
@ -41,33 +41,38 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = ('http://www.libri.de/shop/action/quickSearch?facetNodeId=6'
'&mainsearchSubmit=Los!&searchString=' + urllib2.quote(query))
url = ('http://www.ebook.de/de/pathSearch?nav=52122&searchString='
+ urllib2.quote(query))
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[contains(@class, "item")]'):
for data in doc.xpath('//div[contains(@class, "articlecontainer")]'):
if counter <= 0:
break
details = data.xpath('./div[@class="beschreibungContainer"]')
details = data.xpath('./div[@class="articleinfobox"]')
if not details:
continue
details = details[0]
id = ''.join(details.xpath('./div[@class="text"]/a/@name')).strip()
if not id:
id_ = ''.join(details.xpath('./a/@name')).strip()
if not id_:
continue
cover_url = ''.join(details.xpath('.//div[@class="coverImg"]/a/img/@src'))
title = ''.join(details.xpath('./div[@class="text"]/span[@class="titel"]/a/text()')).strip()
author = ''.join(details.xpath('./div[@class="text"]/span[@class="author"]/text()')).strip()
title = ''.join(details.xpath('.//a[@class="su1_c_l_titel"]/text()')).strip()
author = ''.join(details.xpath('.//div[@class="author"]/text()')).strip()
if author.startswith('von'):
author = author[4:]
pdf = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "pdf")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "pdf")]/text())')
epub = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "epub")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "epub")]/text())')
mobi = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "mobipocket")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
counter -= 1
@ -78,7 +83,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNKNOWN
s.detail_item = id
s.detail_item = id_
formats = []
if epub:
formats.append('ePub')