mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
fix empik plugin
This commit is contained in:
parent
d99064cb58
commit
39dc0af554
@ -1,10 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
store_version = 8 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>'
|
||||
__copyright__ = '2011-2017, Tomasz Długosz <tomek3d@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
@ -46,48 +46,40 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
|
||||
d.exec_()
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + \
|
||||
urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results)
|
||||
url = 'http://www.empik.com/ebooki/ebooki,3501,s?resultsPP=' + str(max_results) + '&q=' + urllib.quote(query)
|
||||
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//div[@class="productsSet"]/div'):
|
||||
for data in doc.xpath('//div[@class="search-list-item"]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
id = ''.join(data.xpath('.//a[@class="productBox-450Title"]/@href'))
|
||||
id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
|
||||
if not id:
|
||||
continue
|
||||
|
||||
cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original'))
|
||||
title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()'))
|
||||
title = re.sub(r' \(ebook\)', '', title)
|
||||
author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()'))
|
||||
price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()'))
|
||||
formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()'))
|
||||
formats = re.sub(r'Ebook *,? *','', formats)
|
||||
formats = re.sub(r'\(.*\)','', formats)
|
||||
cover_url = ''.join(data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
|
||||
author = ', '.join(data.xpath('.//div[@class="smartAuthorWrapper"]/a/text()'))
|
||||
title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
|
||||
price = ''.join(data.xpath('.//div[@class="price"]/text()'))
|
||||
|
||||
with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
|
||||
idata = html.fromstring(nf.read())
|
||||
crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()')
|
||||
formats_more = ','.join([re.sub('ebook, ','', x) for x in crawled if 'ebook' in x])
|
||||
if formats_more:
|
||||
formats += ', ' + formats_more
|
||||
drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])')
|
||||
crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
|
||||
formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.title = title.strip()
|
||||
s.title = title.split(' - ')[0]
|
||||
s.author = author.strip()
|
||||
s.price = price
|
||||
s.price = price.strip()
|
||||
s.detail_item = 'http://empik.com' + id.strip()
|
||||
s.formats = formats.upper().strip()
|
||||
s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED
|
||||
|
||||
yield s
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user