fix empik plugin

This commit is contained in:
Tomasz Długosz 2017-12-10 15:56:49 +01:00
parent d99064cb58
commit 39dc0af554

View File

@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 7 # Needed for dynamic plugin loading
store_version = 8 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>'
__copyright__ = '2011-2017, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
@ -46,48 +46,40 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + \
urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results)
url = 'http://www.empik.com/ebooki/ebooki,3501,s?resultsPP=' + str(max_results) + '&q=' + urllib.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="productsSet"]/div'):
for data in doc.xpath('//div[@class="search-list-item"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//a[@class="productBox-450Title"]/@href'))
id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original'))
title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()'))
title = re.sub(r' \(ebook\)', '', title)
author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()'))
price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()'))
formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()'))
formats = re.sub(r'Ebook *,? *','', formats)
formats = re.sub(r'\(.*\)','', formats)
cover_url = ''.join(data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
author = ', '.join(data.xpath('.//div[@class="smartAuthorWrapper"]/a/text()'))
title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
price = ''.join(data.xpath('.//div[@class="price"]/text()'))
with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()')
formats_more = ','.join([re.sub('ebook, ','', x) for x in crawled if 'ebook' in x])
if formats_more:
formats += ', ' + formats_more
drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])')
crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.title = title.split('  - ')[0]
s.author = author.strip()
s.price = price
s.price = price.strip()
s.detail_item = 'http://empik.com' + id.strip()
s.formats = formats.upper().strip()
s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED
yield s