mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Recover from yet another Amazon EU website change. This time they introduced a new table form with a very different structure. They switch between the form types on a seemingly random basis.
This commit is contained in:
parent
433dcea35a
commit
89f712dc53
@ -52,25 +52,61 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -120,4 +156,3 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -51,25 +51,61 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
@ -48,25 +48,61 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
@ -51,25 +51,61 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -119,3 +155,4 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -55,25 +55,61 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
#print(url)
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
allText = f.read()
|
||||||
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
|
#print('grid form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||||
|
#print('ilo form')
|
||||||
|
data_xpath = '//li[(@class="ilo")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
# Results can be in a grid (table) or a column
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
|
#print('list form')
|
||||||
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
|
format_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||||
|
asin_xpath = '@name'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||||
|
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||||
|
price_xpath = (
|
||||||
|
'.//ul[contains(@class, "rsltL")]'
|
||||||
|
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||||
|
else:
|
||||||
|
# URK -- whats this?
|
||||||
|
print('unknown result table form for Amazon EU search')
|
||||||
|
#with open("c:/amazon_search_results.html", "w") as out:
|
||||||
|
# out.write(allText)
|
||||||
|
return
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
format_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
|
||||||
asin_xpath = '@name'
|
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
|
||||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
|
||||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
|
||||||
# Results can be in a grid (table) or a column
|
|
||||||
price_xpath = (
|
|
||||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
||||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user