Remove rest of Amazon EU stores

Merge branch 'master' of https://github.com/cbhaley/calibre
This commit is contained in:
Kovid Goyal 2015-12-05 20:30:51 +05:30
commit ed30c16817
4 changed files with 51 additions and 454 deletions

View File

@ -1312,6 +1312,8 @@ class StoreAmazonFRKindleStore(StoreBase):
formats = ['KINDLE'] formats = ['KINDLE']
affiliate = True affiliate = True
# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave
# it here until then so users have a chance to see wny the store is gone.
class StoreAmazonITKindleStore(StoreBase): class StoreAmazonITKindleStore(StoreBase):
name = 'Amazon IT Kindle' name = 'Amazon IT Kindle'
author = 'Charles Haley' author = 'Charles Haley'
@ -1322,6 +1324,8 @@ class StoreAmazonITKindleStore(StoreBase):
formats = ['KINDLE'] formats = ['KINDLE']
affiliate = True affiliate = True
# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave
# it here until then so users have a chance to see wny the store is gone.
class StoreAmazonESKindleStore(StoreBase): class StoreAmazonESKindleStore(StoreBase):
name = 'Amazon ES Kindle' name = 'Amazon ES Kindle'
author = 'Charles Haley' author = 'Charles Haley'
@ -1332,6 +1336,8 @@ class StoreAmazonESKindleStore(StoreBase):
formats = ['KINDLE'] formats = ['KINDLE']
affiliate = True affiliate = True
# Remove this plugin definition in Jan 2016 because of Amazon demand. We leave
# it here until then so users have a chance to see wny the store is gone.
class StoreAmazonUKKindleStore(StoreBase): class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle' name = 'Amazon UK Kindle'
author = 'Charles Haley' author = 'Charles Haley'

View File

@ -1,169 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 8 # Needed for dynamic plugin loading store_version = 9 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
from lxml import html
from PyQt5.Qt import QUrl
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonESKindleStore(StorePlugin): class AmazonESKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py Amazon forcibly closed the affiliate account, requesting that "all links
toward Amazon content be removed".
''' '''
aff_id = {'tag': 'charhale09-21'} def genesis(self):
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&' StorePlugin.genesis(self)
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790') from calibre.customize.ui import find_plugin
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&' pi = find_plugin('Amazon ES Kindle')
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s' pi.affiliate = False
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' y '
# ---- Copy from here to end
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pass
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
format_xpath2 = ''
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[contains(@class, "productImage")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "s-result-list-parent-container")]'):
#print('new list form')
data_xpath = '//li[contains(@class, "s-result-item")]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//div[contains(@class, "a-spacing-none")]/a/span[contains(@class, "s-price")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//li[@class="s-result-item"]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//span[contains(@class, "s-price")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
if format_xpath2:
format_ = ''.join(data.xpath(format_xpath2))
if 'kindle' not in format_.lower():
# print(etree.tostring(data, pretty_print=True))
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath)[-1])
counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.title = 'Amazon demanded that this<br>store be permanently closed.'
s.title = title.strip() s.author = None
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):

View File

@ -1,169 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 8 # Needed for dynamic plugin loading store_version = 9 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
from lxml import html
from PyQt5.Qt import QUrl
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonITKindleStore(StorePlugin): class AmazonITKindleStore(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py Amazon forcibly closed the affiliate account, requesting that "all links
toward Amazon content be removed".
''' '''
aff_id = {'tag': 'httpcharles07-21'} def genesis(self):
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&' StorePlugin.genesis(self)
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322') from calibre.customize.ui import find_plugin
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&' pi = find_plugin('Amazon IT Kindle')
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&' pi.affiliate = False
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'di '
and_word = ' e '
# ---- Copy from here to end
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pass
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
format_xpath2 = ''
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[contains(@class, "productImage")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "s-result-list-parent-container")]'):
#print('new list form')
data_xpath = '//li[contains(@class, "s-result-item")]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//div[contains(@class, "a-spacing-none")]/a/span[contains(@class, "s-price")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//li[@class="s-result-item"]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//span[contains(@class, "s-price")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
if format_xpath2:
format_ = ''.join(data.xpath(format_xpath2))
if 'kindle' not in format_.lower():
# print(etree.tostring(data, pretty_print=True))
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath)[-1])
counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.title = 'Amazon demanded that this<br>store be permanently closed.'
s.title = title.strip() s.author = None
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):

View File

@ -1,176 +1,37 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 8 # Needed for dynamic plugin loading store_version = 9 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing
from lxml import html
# from lxml import html
from PyQt5.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonUKKindleStore(StorePlugin): class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '
and_word = ' and '
# This code is copy/pasted from from here to the other amazon EU. Do not
# modify it in any other amazon EU plugin. Be sure to paste it into all
# other amazon EU plugins when modified.
# ---- Copy from here to end
''' '''
For comments on the implementation, please see amazon_plugin.py Amazon forcibly closed the affiliate account, requesting that "all links
toward Amazon content be removed".
''' '''
def genesis(self):
StorePlugin.genesis(self)
from calibre.customize.ui import find_plugin
pi = find_plugin('Amazon UK Kindle')
pi.affiliate = False
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
pass
store_link = self.store_link % self.aff_id
if detail_item:
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
format_xpath2 = ''
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[contains(@class, "productImage")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "s-result-list-parent-container")]'):
#print('new list form')
data_xpath = '//li[contains(@class, "s-result-item")]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//div[contains(@class, "a-spacing-none")]/a/span[contains(@class, "s-price")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//li[@class="s-result-item"]'
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
asin_xpath = '@data-asin'
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
author_xpath = ('.//div[contains(@class, "a-fixed-left-grid-col")]'
'/div/div/span//text()')
price_xpath = ('.//span[contains(@class, "s-price")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
if format_xpath2:
format_ = ''.join(data.xpath(format_xpath2))
if 'kindle' not in format_.lower():
# print(etree.tostring(data, pretty_print=True))
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath)[-1])
counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.title = 'Amazon demanded that this<br>store be permanently closed.'
s.title = title.strip() s.author = None
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass