improved HTML parsing

This commit is contained in:
Tomasz Długosz 2011-08-27 22:44:44 +02:00
parent 3d431f2ecf
commit 6a3ad1d22f

View File

@ -56,11 +56,11 @@ class RW2010Store(BasicStoreConfig, StorePlugin):
with closing(br.open(id.strip(), timeout=timeout/4)) as nf: with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read()) idata = html.fromstring(nf.read())
cover_url = ''.join(idata.xpath('//div[@class="boxa"]/div[1]/img/@src')) cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src'))
author = ''.join(idata.xpath('//div[@class="boxb"]/h3[3]/span/text()')) author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()'))
title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()')) title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()'))
title = re.sub(r'\(#.+\)', '', title) title = re.sub(r'\(#.+\)', '', title)
formats = ''.join(idata.xpath('//div[@class="boxb"]/h3[6]/span/text()')) formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()'))
price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł' price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł'
counter -= 1 counter -= 1