Update get books amazon EU plugins for website change.
This commit is contained in:
Kovid Goyal 2013-07-12 13:48:23 +05:30
commit abf3bed75b
5 changed files with 256 additions and 76 deletions

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -52,25 +52,61 @@ class AmazonDEKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
#print(url)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
for data in doc.xpath(data_xpath):
if counter <= 0:
@ -120,4 +156,3 @@ class AmazonDEKindleStore(StorePlugin):
def get_details(self, search_result, timeout):
pass

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -51,25 +51,61 @@ class AmazonESKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
#print(url)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
for data in doc.xpath(data_xpath):
if counter <= 0:

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -48,25 +48,61 @@ class AmazonFRKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
#print(url)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
for data in doc.xpath(data_xpath):
if counter <= 0:

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -51,25 +51,61 @@ class AmazonITKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
#print(url)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
for data in doc.xpath(data_xpath):
if counter <= 0:
@ -119,3 +155,4 @@ class AmazonITKindleStore(StorePlugin):
def get_details(self, search_result, timeout):
pass

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 3 # Needed for dynamic plugin loading
store_version = 4 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -55,25 +55,61 @@ class AmazonUKKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
#print(url)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
allText = f.read()
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
#print('grid form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
#print('ilo form')
data_xpath = '//li[(@class="ilo")]'
format_xpath = (
'.//ul[contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
#print('list form')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = (
'.//ul[contains(@class, "rsltL")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
else:
# URK -- whats this?
print('unknown result table form for Amazon EU search')
#with open("c:/amazon_search_results.html", "w") as out:
# out.write(allText)
return
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
for data in doc.xpath(data_xpath):
if counter <= 0: