"Close" Amazon.FR and Amazon.DE. Amazon forcibly closed these two affiliate accounts, demanding that "all links toward Amazon content be removed".

This commit is contained in:
Charles Haley 2015-12-05 14:56:12 +01:00
parent 26cff461eb
commit 80b3a8ae41
3 changed files with 31 additions and 295 deletions

View File

@ -1288,6 +1288,8 @@ class StoreAmazonCAKindleStore(StoreBase):
formats = ['KINDLE'] formats = ['KINDLE']
# affiliate = True # affiliate = True
# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave
# it here until then so users have a chance to see wny the store is gone.
class StoreAmazonDEKindleStore(StoreBase): class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle' name = 'Amazon DE Kindle'
author = 'Charles Haley' author = 'Charles Haley'
@ -1298,6 +1300,8 @@ class StoreAmazonDEKindleStore(StoreBase):
formats = ['KINDLE'] formats = ['KINDLE']
affiliate = True affiliate = True
# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave
# it here until then so users have a chance to see wny the store is gone.
class StoreAmazonFRKindleStore(StoreBase): class StoreAmazonFRKindleStore(StoreBase):
name = 'Amazon FR Kindle' name = 'Amazon FR Kindle'
author = 'Charles Haley' author = 'Charles Haley'

View File

@ -1,170 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 8 # Needed for dynamic plugin loading store_version = 9 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
from lxml import html
from PyQt5.Qt import QUrl
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(StorePlugin): class AmazonDEKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py Amazon forcibly closed the affiliate account, requesting that "all links
toward Amazon content be removed".
''' '''
aff_id = {'tag': 'charhale0a-21'} def genesis(self):
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' StorePlugin.genesis(self)
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454' from calibre.customize.ui import find_plugin
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') pi = find_plugin('Amazon DE Kindle')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8' pi.affiliate = False
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'von '
and_word = ' und '
# ---- Copy from here to end
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pass
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
format_xpath2 = ''
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[contains(@class, "productImage")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "s-result-list-parent-container")]'):
#print('new list form')
data_xpath = '//li[contains(@class, "s-result-item")]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//div[contains(@class, "a-spacing-none")]/a/span[contains(@class, "s-price")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//li[@class="s-result-item"]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//span[contains(@class, "s-price")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
if format_xpath2:
format_ = ''.join(data.xpath(format_xpath2))
if 'kindle' not in format_.lower():
# print(etree.tostring(data, pretty_print=True))
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath)[-1])
counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.title = 'Amazon demanded that this<br>store be permanently closed.'
s.title = title.strip() s.author = None
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):

View File

@ -1,166 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 8 # Needed for dynamic plugin loading store_version = 9 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
from lxml import html
from PyQt5.Qt import QUrl
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin): class AmazonFRKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py Amazon forcibly closed the affiliate account, requesting that "all links
toward Amazon content be removed".
''' '''
aff_id = {'tag': 'charhale-21'} def genesis(self):
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id StorePlugin.genesis(self)
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' from calibre.customize.ui import find_plugin
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' pi = find_plugin('Amazon FR Kindle')
pi.affiliate = False
author_article = 'de '
and_word = ' et '
# ---- Copy from here to end
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pass
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
format_xpath2 = ''
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[contains(@class, "productImage")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "s-result-list-parent-container")]'):
#print('new list form')
data_xpath = '//li[contains(@class, "s-result-item")]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//div[contains(@class, "a-spacing-none")]/a/span[contains(@class, "s-price")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//li[@class="s-result-item"]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//span[contains(@class, "s-price")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
if format_xpath2:
format_ = ''.join(data.xpath(format_xpath2))
if 'kindle' not in format_.lower():
# print(etree.tostring(data, pretty_print=True))
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath)[-1])
counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.title = 'Amazon demanded that this<br>store be permanently closed.'
s.title = title.strip() s.author = None
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):