fix empik plugin

2025-11-21 22:13:04 -05:00 · 2017-12-10 15:56:49 +01:00 · 2017-12-10 15:56:49 +01:00 · 39dc0af554
commit 39dc0af554
parent d99064cb58
1 changed files with 14 additions and 22 deletions
--- a/src/calibre/gui2/store/stores/empik_plugin.py
+++ b/src/calibre/gui2/store/stores/empik_plugin.py
@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 7  # Needed for dynamic plugin loading
+store_version = 8  # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
-__copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>'
+__copyright__ = '2011-2017, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import re
@ -46,48 +46,40 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
            d.exec_()

    def search(self, query, max_results=10, timeout=60):
-        url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + \
-            urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results)
+        url = 'http://www.empik.com/ebooki/ebooki,3501,s?resultsPP=' + str(max_results) + '&q=' + urllib.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="productsSet"]/div'):
+            for data in doc.xpath('//div[@class="search-list-item"]'):
                if counter <= 0:
                    break

-                id = ''.join(data.xpath('.//a[@class="productBox-450Title"]/@href'))
+                id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
                if not id:
                    continue

-                cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original'))
-                title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()'))
-                title = re.sub(r' \(ebook\)', '', title)
-                author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()'))
-                price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()'))
-                formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()'))
-                formats = re.sub(r'Ebook *,? *','', formats)
-                formats = re.sub(r'\(.*\)','', formats)
+                cover_url = ''.join(data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
+                author = ', '.join(data.xpath('.//div[@class="smartAuthorWrapper"]/a/text()'))
+                title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
+                price = ''.join(data.xpath('.//div[@class="price"]/text()'))
+
                with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
-                    crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()')
-                    formats_more = ','.join([re.sub('ebook, ','', x) for x in crawled if 'ebook' in x])
-                    if formats_more:
-                        formats += ', ' + formats_more
-                drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])')
+                    crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
+                    formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
-                s.title = title.strip()
+                s.title = title.split('  - ')[0]
                s.author = author.strip()
-                s.price = price
+                s.price = price.strip()
                s.detail_item = 'http://empik.com' + id.strip()
                s.formats = formats.upper().strip()
-                s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED

                yield s