mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Merge branch 'master' of https://github.com/cbhaley/calibre
Update get books amazon EU plugins for website change.
This commit is contained in:
commit
abf3bed75b
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -52,25 +52,61 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -120,4 +156,3 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -51,25 +51,61 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -48,25 +48,61 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -51,25 +51,61 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -119,3 +155,4 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 3 # Needed for dynamic plugin loading
|
store_version = 4 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -55,25 +55,61 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user