From 39dc0af55478210d3d24fdbc6332ca597d03d1fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 10 Dec 2017 15:56:49 +0100 Subject: [PATCH] fix empik plugin --- src/calibre/gui2/store/stores/empik_plugin.py | 36 ++++++++----------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/calibre/gui2/store/stores/empik_plugin.py b/src/calibre/gui2/store/stores/empik_plugin.py index e11bc0ba92..ef9192899e 100644 --- a/src/calibre/gui2/store/stores/empik_plugin.py +++ b/src/calibre/gui2/store/stores/empik_plugin.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 7 # Needed for dynamic plugin loading +store_version = 8 # Needed for dynamic plugin loading __license__ = 'GPL 3' -__copyright__ = '2011-2015, Tomasz Długosz ' +__copyright__ = '2011-2017, Tomasz Długosz ' __docformat__ = 'restructuredtext en' import re @@ -46,48 +46,40 @@ class EmpikStore(BasicStoreConfig, StorePlugin): d.exec_() def search(self, query, max_results=10, timeout=60): - url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + \ - urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results) + url = 'http://www.empik.com/ebooki/ebooki,3501,s?resultsPP=' + str(max_results) + '&q=' + urllib.quote(query) br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="productsSet"]/div'): + for data in doc.xpath('//div[@class="search-list-item"]'): if counter <= 0: break - id = ''.join(data.xpath('.//a[@class="productBox-450Title"]/@href')) + id = ''.join(data.xpath('.//div[@class="name"]/a/@href')) if not id: continue - cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original')) - title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()')) - title = re.sub(r' \(ebook\)', '', title) - author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()')) - price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()')) - formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()')) - formats = re.sub(r'Ebook *,? *','', formats) - formats = re.sub(r'\(.*\)','', formats) + cover_url = ''.join(data.xpath('.//a/img[@class="lazy"]/@lazy-img')) + author = ', '.join(data.xpath('.//div[@class="smartAuthorWrapper"]/a/text()')) + title = ''.join(data.xpath('.//div[@class="name"]/a/@title')) + price = ''.join(data.xpath('.//div[@class="price"]/text()')) + with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf: idata = html.fromstring(nf.read()) - crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()') - formats_more = ','.join([re.sub('ebook, ','', x) for x in crawled if 'ebook' in x]) - if formats_more: - formats += ', ' + formats_more - drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])') + crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()') + formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x]) counter -= 1 s = SearchResult() s.cover_url = cover_url - s.title = title.strip() + s.title = title.split('  - ')[0] s.author = author.strip() - s.price = price + s.price = price.strip() s.detail_item = 'http://empik.com' + id.strip() s.formats = formats.upper().strip() - s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED yield s