Handle column and grid layouts in amazon EU stores

This commit is contained in:
Charles Haley 2013-04-01 13:54:44 +02:00
parent de0c864159
commit 89a5375f57
5 changed files with 265 additions and 290 deletions

View File

@ -7,7 +7,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, time
import re
from contextlib import closing
from lxml import html
@ -29,9 +29,6 @@ class AmazonEUBase(StorePlugin):
For comments on the implementation, please see amazon_plugin.py
'''
MAX_SEARCH_ATTEMPTS = 5
SLEEP_BETWEEN_ATTEMPTS = 3
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
@ -42,73 +39,71 @@ class AmazonEUBase(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
loops = 0
while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
br = browser()
if loops > 0:
print ("Retry getbooks search", self.__class__.__name__, counter,
max_results, loops)
time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
loops += 1
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath))
price = ''.join(data.xpath(price_xpath))
counter -= 1
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
yield s
def get_details(self, search_result, timeout):
pass

View File

@ -7,7 +7,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, time
import re
from contextlib import closing
from lxml import html
@ -28,9 +28,6 @@ class AmazonEUBase(StorePlugin):
For comments on the implementation, please see amazon_plugin.py
'''
MAX_SEARCH_ATTEMPTS = 5
SLEEP_BETWEEN_ATTEMPTS = 3
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
@ -41,73 +38,71 @@ class AmazonEUBase(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
loops = 0
while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
br = browser()
if loops > 0:
print ("Retry getbooks search", self.__class__.__name__, counter,
max_results, loops)
time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
loops += 1
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath))
price = ''.join(data.xpath(price_xpath))
counter -= 1
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
yield s
def get_details(self, search_result, timeout):
pass

View File

@ -7,7 +7,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, time
import re
from contextlib import closing
from lxml import html
@ -29,9 +29,6 @@ class AmazonEUBase(StorePlugin):
For comments on the implementation, please see amazon_plugin.py
'''
MAX_SEARCH_ATTEMPTS = 5
SLEEP_BETWEEN_ATTEMPTS = 3
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
@ -42,73 +39,71 @@ class AmazonEUBase(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
loops = 0
while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
br = browser()
if loops > 0:
print ("Retry getbooks search", self.__class__.__name__, counter,
max_results, loops)
time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
loops += 1
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath))
price = ''.join(data.xpath(price_xpath))
counter -= 1
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
yield s
def get_details(self, search_result, timeout):
pass

View File

@ -7,7 +7,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, time
import re
from contextlib import closing
from lxml import html
@ -28,9 +28,6 @@ class AmazonEUBase(StorePlugin):
For comments on the implementation, please see amazon_plugin.py
'''
MAX_SEARCH_ATTEMPTS = 5
SLEEP_BETWEEN_ATTEMPTS = 3
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
@ -41,73 +38,71 @@ class AmazonEUBase(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
loops = 0
while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
br = browser()
if loops > 0:
print ("Retry getbooks search", self.__class__.__name__, counter,
max_results, loops)
time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
loops += 1
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath))
price = ''.join(data.xpath(price_xpath))
counter -= 1
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
yield s
def get_details(self, search_result, timeout):
pass

View File

@ -7,7 +7,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, time
import re
from contextlib import closing
from lxml import html
@ -28,9 +28,6 @@ class AmazonEUBase(StorePlugin):
For comments on the implementation, please see amazon_plugin.py
'''
MAX_SEARCH_ATTEMPTS = 5
SLEEP_BETWEEN_ATTEMPTS = 3
def open(self, parent=None, detail_item=None, external=False):
store_link = self.store_link % self.aff_id
@ -41,73 +38,71 @@ class AmazonEUBase(StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
loops = 0
while counter == max_results and loops < self.MAX_SEARCH_ATTEMPTS:
br = browser()
if loops > 0:
print ("Retry getbooks search", self.__class__.__name__, counter,
max_results, loops)
time.sleep(self.SLEEP_BETWEEN_ATTEMPTS)
loops += 1
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "prod")]'
# Results can be in a grid (table) or a column
format_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
# Results can be in a grid (table) or a column
price_xpath = (
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (authors pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = data.xpath(asin_xpath)
if asin:
asin = asin[0]
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath))
title = ''.join(data.xpath(title_xpath))
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
authors = ''.join(data.xpath(author_xpath))
authors = re.sub('^' + self.author_article, '', authors)
authors = re.sub(self.and_word, ' & ', authors)
mo = re.match(r'(.*)(\(\d.*)$', authors)
if mo:
authors = mo.group(1).strip()
price = ''.join(data.xpath(price_xpath))
price = ''.join(data.xpath(price_xpath))
counter -= 1
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = authors.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
yield s
def get_details(self, search_result, timeout):
pass