Fix amazon stores to account for website change.

This commit is contained in:
Charles Haley 2013-01-12 12:35:26 +01:00
parent c414e38d37
commit 9c41f1173a
2 changed files with 14 additions and 30 deletions

View File

@ -6,8 +6,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random
import re
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -130,16 +128,16 @@ class AmazonKindleStore(StorePlugin):
data_xpath = '//div[contains(@class, "prod")]' data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()' format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]' asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src' cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()' title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath): for data in doc.xpath(data_xpath):
if counter <= 0: if counter <= 0:
break break
# Even though we are searching digital-text only Amazon will still # Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). Se we need # put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it # to explicitly check if the item is a Kindle book and ignore it
@ -147,21 +145,15 @@ class AmazonKindleStore(StorePlugin):
format = ''.join(data.xpath(format_xpath)) format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower(): if 'kindle' not in format.lower():
continue continue
# We must have an asin otherwise we can't easily reference the # We must have an asin otherwise we can't easily reference the
# book later. # book later.
asin_href = None asin = data.xpath(asin_xpath)
asin_a = data.xpath(asin_xpath) if asin:
if asin_a: asin = asin[0]
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else: else:
continue continue
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath(title_xpath)) title = ''.join(data.xpath(title_xpath))
@ -172,9 +164,9 @@ class AmazonKindleStore(StorePlugin):
pass pass
price = ''.join(data.xpath(price_xpath)) price = ''.join(data.xpath(price_xpath))
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url.strip() s.cover_url = cover_url.strip()
s.title = title.strip() s.title = title.strip()

View File

@ -6,8 +6,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -53,7 +51,7 @@ class AmazonUKKindleStore(StorePlugin):
data_xpath = '//div[contains(@class, "prod")]' data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()' format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]' asin_xpath = '@name'
cover_xpath = './/img[@class="productImage"]/@src' cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()' title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
@ -73,15 +71,9 @@ class AmazonUKKindleStore(StorePlugin):
# We must have an asin otherwise we can't easily reference the # We must have an asin otherwise we can't easily reference the
# book later. # book later.
asin_href = None asin = data.xpath(asin_xpath)
asin_a = data.xpath(asin_xpath) if asin:
if asin_a: asin = asin[0]
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else: else:
continue continue