From 9355da4576f052f44ff453ccd13f26ecca34f493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 21 Feb 2015 01:15:07 +0100 Subject: [PATCH 1/3] Empik Plugin: fix more than one author for one book --- src/calibre/gui2/store/stores/empik_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/stores/empik_plugin.py b/src/calibre/gui2/store/stores/empik_plugin.py index 4303298fde..0204c3c913 100644 --- a/src/calibre/gui2/store/stores/empik_plugin.py +++ b/src/calibre/gui2/store/stores/empik_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 6 # Needed for dynamic plugin loading +store_version = 7 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011-2015, Tomasz Długosz ' @@ -63,7 +63,7 @@ class EmpikStore(BasicStoreConfig, StorePlugin): cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original')) title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()')) title = re.sub(r' \(ebook\)', '', title) - author = ''.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()')) + author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()')) price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()')) formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()')) formats = re.sub(r'Ebook *,? *','', formats) From 7c0036a7c6c412692ad969b1a7ec7fb930e1578d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 21 Feb 2015 03:02:20 +0100 Subject: [PATCH 2/3] Empik Plugin: improve hidden formats detection --- src/calibre/gui2/store/stores/empik_plugin.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/stores/empik_plugin.py b/src/calibre/gui2/store/stores/empik_plugin.py index 0204c3c913..4eef1ab685 100644 --- a/src/calibre/gui2/store/stores/empik_plugin.py +++ b/src/calibre/gui2/store/stores/empik_plugin.py @@ -45,7 +45,7 @@ class EmpikStore(BasicStoreConfig, StorePlugin): d.exec_() def search(self, query, max_results=10, timeout=60): - url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&resultsPP=' + str(max_results) + url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results) br = browser() @@ -68,13 +68,19 @@ class EmpikStore(BasicStoreConfig, StorePlugin): formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()')) formats = re.sub(r'Ebook *,? *','', formats) formats = re.sub(r'\(.*\)','', formats) + with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf: + idata = html.fromstring(nf.read()) + crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()') + formats_more = ','.join([ re.sub('ebook, ','', x) for x in crawled if 'ebook' in x]) + if formats_more: + formats += ', ' + formats_more drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])') counter -= 1 s = SearchResult() s.cover_url = cover_url - s.title = title.strip() + ' ' + formats + s.title = title.strip() s.author = author.strip() s.price = price s.detail_item = 'http://empik.com' + id.strip() From 00077af7edc46d58a5d6aefa3fe8519ae34658b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 20 Feb 2015 22:41:52 +0100 Subject: [PATCH 3/3] Cda Plugin: align to website changes --- src/calibre/gui2/store/stores/cdp_plugin.py | 39 ++++++++++----------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/calibre/gui2/store/stores/cdp_plugin.py b/src/calibre/gui2/store/stores/cdp_plugin.py index caf7337c3d..cfeec1e7f0 100644 --- a/src/calibre/gui2/store/stores/cdp_plugin.py +++ b/src/calibre/gui2/store/stores/cdp_plugin.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 4 # Needed for dynamic plugin loading +store_version = 5 # Needed for dynamic plugin loading __license__ = 'GPL 3' -__copyright__ = '2013-2014, Tomasz Długosz ' +__copyright__ = '2013-2015, Tomasz Długosz ' __docformat__ = 'restructuredtext en' import urllib @@ -50,43 +50,40 @@ class CdpStore(BasicStoreConfig, StorePlugin): counter = max_results while counter: - with closing(br.open(u'https://cdp.pl/products/search?utf8=✓&keywords=' + urllib.quote_plus(query) + '&page=' + str(page), timeout=timeout)) as f: + with closing(br.open(u'https://cdp.pl/ksiazki/e-book.html?q=' + urllib.quote_plus(query) + '&p=' + str(page), timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//ul[@id="products"]/li'): + for data in doc.xpath('//ul[@class="grid-of-products"]/li'): if counter <= 0: break - id = ''.join(data.xpath('.//div[@class="product-image"]/a[1]/@href')) + id = ''.join(data.xpath('.//a[@class="product-image"]/@href')) if not id: continue if 'ksiazki' not in id: continue - cover_url = ''.join(data.xpath('.//div[@class="product-image"]/a[1]/@data-background')) - cover_url = cover_url.split('\'')[1] - title = ''.join(data.xpath('.//div[@class="product-description"]/h2/a/text()')) - author = ''.join(data.xpath('.//div[@class="product-description"]//ul[@class="taxons"]/li[@class="author"]/a/text()')) - price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')) + cover_url = ''.join(data.xpath('.//a[@class="product-image"]/img/@data-src')) + title = ''.join(data.xpath('.//h3[1]/a/@title')) + price = ''.join(data.xpath('.//span[@class="custom_price"]/text()'))+','+''.join(data.xpath('.//span[@class="custom_price"]/sup/text()')) + author = '' + formats = '' + with closing(br.open( id.strip(), timeout=timeout/4)) as nf: + idata = html.fromstring(nf.read()) + author = ', '.join(idata.xpath('.//ul[@class="film-data"]/li[1]/p/text()')) + formats = idata.xpath('//div[@class="product-attributes-container"][2]/ul/li/span/text()')[-1] counter -= 1 s = SearchResult() s.cover_url = cover_url s.title = title.strip() - s.author = author.strip() - s.price = price + s.author = author + s.price = price + ' zł' s.detail_item = id.strip() s.drm = SearchResult.DRM_UNLOCKED + s.formats = formats.upper().strip() yield s - if not doc.xpath('//span[@class="next"]/a'): + if not doc.xpath('//span[@class="next-page"]/a'): break page+=1 - - def get_details(self, search_result, timeout): - br = browser() - with closing(br.open(search_result.detail_item, timeout=timeout)) as nf: - idata = html.fromstring(nf.read()) - formats = ', '.join(idata.xpath('//div[@id="product-bonus"]/div/ul/li/text()')) - search_result.formats = formats.upper() - return True