Get Books: Update Empik and CDP store plugins

Merge branch 'master' of https://github.com/t3d/calibre
This commit is contained in:
Kovid Goyal 2015-02-21 09:34:46 +05:30
commit 51beffc7e9
2 changed files with 28 additions and 25 deletions

View File

@ -1,10 +1,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 4 # Needed for dynamic plugin loading store_version = 5 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2013-2014, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2013-2015, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import urllib import urllib
@ -50,43 +50,40 @@ class CdpStore(BasicStoreConfig, StorePlugin):
counter = max_results counter = max_results
while counter: while counter:
with closing(br.open(u'https://cdp.pl/products/search?utf8=✓&keywords=' + urllib.quote_plus(query) + '&page=' + str(page), timeout=timeout)) as f: with closing(br.open(u'https://cdp.pl/ksiazki/e-book.html?q=' + urllib.quote_plus(query) + '&p=' + str(page), timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//ul[@id="products"]/li'): for data in doc.xpath('//ul[@class="grid-of-products"]/li'):
if counter <= 0: if counter <= 0:
break break
id = ''.join(data.xpath('.//div[@class="product-image"]/a[1]/@href')) id = ''.join(data.xpath('.//a[@class="product-image"]/@href'))
if not id: if not id:
continue continue
if 'ksiazki' not in id: if 'ksiazki' not in id:
continue continue
cover_url = ''.join(data.xpath('.//div[@class="product-image"]/a[1]/@data-background')) cover_url = ''.join(data.xpath('.//a[@class="product-image"]/img/@data-src'))
cover_url = cover_url.split('\'')[1] title = ''.join(data.xpath('.//h3[1]/a/@title'))
title = ''.join(data.xpath('.//div[@class="product-description"]/h2/a/text()')) price = ''.join(data.xpath('.//span[@class="custom_price"]/text()'))+','+''.join(data.xpath('.//span[@class="custom_price"]/sup/text()'))
author = ''.join(data.xpath('.//div[@class="product-description"]//ul[@class="taxons"]/li[@class="author"]/a/text()')) author = ''
price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')) formats = ''
with closing(br.open( id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
author = ', '.join(idata.xpath('.//ul[@class="film-data"]/li[1]/p/text()'))
formats = idata.xpath('//div[@class="product-attributes-container"][2]/ul/li/span/text()')[-1]
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author
s.price = price s.price = price + ''
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED s.drm = SearchResult.DRM_UNLOCKED
s.formats = formats.upper().strip()
yield s yield s
if not doc.xpath('//span[@class="next"]/a'): if not doc.xpath('//span[@class="next-page"]/a'):
break break
page+=1 page+=1
def get_details(self, search_result, timeout):
br = browser()
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
formats = ', '.join(idata.xpath('//div[@id="product-bonus"]/div/ul/li/text()'))
search_result.formats = formats.upper()
return True

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 6 # Needed for dynamic plugin loading store_version = 7 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2011-2015, Tomasz Długosz <tomek3d@gmail.com>'
@ -45,7 +45,7 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&resultsPP=' + str(max_results) url = 'http://www.empik.com/szukaj/produkt?c=ebooki-ebooki&q=' + urllib.quote(query) + '&qtype=basicForm&start=1&catalogType=pl&searchCategory=3501&format=epub&format=mobi&format=pdf&resultsPP=' + str(max_results)
br = browser() br = browser()
@ -63,18 +63,24 @@ class EmpikStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original')) cover_url = ''.join(data.xpath('.//div[@class="productBox-450Pic"]/a/img/@data-original'))
title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()')) title = ''.join(data.xpath('.//a[@class="productBox-450Title"]/text()'))
title = re.sub(r' \(ebook\)', '', title) title = re.sub(r' \(ebook\)', '', title)
author = ''.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()')) author = ', '.join(data.xpath('.//div[@class="productBox-450Author"]/a/text()'))
price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()')) price = ''.join(data.xpath('.//span[@class="currentPrice"]/text()'))
formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()')) formats = ''.join(data.xpath('.//div[@class="productBox-450Type"]/text()'))
formats = re.sub(r'Ebook *,? *','', formats) formats = re.sub(r'Ebook *,? *','', formats)
formats = re.sub(r'\(.*\)','', formats) formats = re.sub(r'\(.*\)','', formats)
with closing(br.open('http://empik.com' + id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
crawled = idata.xpath('.//td[(@class="connectedInfo") or (@class="connectedInfo connectedBordered")]/a/text()')
formats_more = ','.join([ re.sub('ebook, ','', x) for x in crawled if 'ebook' in x])
if formats_more:
formats += ', ' + formats_more
drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])') drm = data.xpath('boolean(.//div[@class="productBox-450Type" and contains(text(), "ADE")])')
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() + ' ' + formats s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price
s.detail_item = 'http://empik.com' + id.strip() s.detail_item = 'http://empik.com' + id.strip()