mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
fix empik plugin
This commit is contained in:
parent
d99064cb58
commit
39dc0af554
@ -1,10 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 7 # Needed for dynamic plugin loading
|
store_version = 8 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>'
|
__copyright__ = '2011-2017, Tomasz Długosz <tomek3d@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -46,48 +46,40 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + \
|
url = 'http://www.empik.com/ebooki/ebooki,3501,s?resultsPP=' + str(max_results) + '&q=' + urllib.quote(query)
|
||||||
urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results)
|
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
for data in doc.xpath('//div[@class="productsSet"]/div'):
|
for data in doc.xpath('//div[@class="search-list-item"]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//a[@class="productBox-450Title"]/@href'))
|
id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
|
||||||
if not id:
|
if not id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original'))
|
cover_url = ''.join(data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
|
||||||
title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()'))
|
author = ', '.join(data.xpath('.//div[@class="smartAuthorWrapper"]/a/text()'))
|
||||||
title = re.sub(r' \(ebook\)', '', title)
|
title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
|
||||||
author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()'))
|
price = ''.join(data.xpath('.//div[@class="price"]/text()'))
|
||||||
price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()'))
|
|
||||||
formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()'))
|
|
||||||
formats = re.sub(r'Ebook *,? *','', formats)
|
|
||||||
formats = re.sub(r'\(.*\)','', formats)
|
|
||||||
with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
|
with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
|
||||||
idata = html.fromstring(nf.read())
|
idata = html.fromstring(nf.read())
|
||||||
crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()')
|
crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
|
||||||
formats_more = ','.join([re.sub('ebook, ','', x) for x in crawled if 'ebook' in x])
|
formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])
|
||||||
if formats_more:
|
|
||||||
formats += ', ' + formats_more
|
|
||||||
drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])')
|
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.cover_url = cover_url
|
s.cover_url = cover_url
|
||||||
s.title = title.strip()
|
s.title = title.split(' - ')[0]
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price
|
s.price = price.strip()
|
||||||
s.detail_item = 'http://empik.com' + id.strip()
|
s.detail_item = 'http://empik.com' + id.strip()
|
||||||
s.formats = formats.upper().strip()
|
s.formats = formats.upper().strip()
|
||||||
s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user