From 8632be2b8c572e6f139b2eae7b46989b5f0b4da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Thu, 8 May 2014 21:50:42 +0200 Subject: [PATCH] Update Woblink - rewrite plugin to use js enabled browser --- .../gui2/store/stores/woblink_plugin.py | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/src/calibre/gui2/store/stores/woblink_plugin.py b/src/calibre/gui2/store/stores/woblink_plugin.py index 59d7987a62..66ddbc149f 100644 --- a/src/calibre/gui2/store/stores/woblink_plugin.py +++ b/src/calibre/gui2/store/stores/woblink_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 7 # Needed for dynamic plugin loading +store_version = 8 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011-2014, Tomasz Długosz ' @@ -16,13 +16,27 @@ from lxml import html from PyQt4.Qt import QUrl -from calibre import browser, url_slash_cleaner +from calibre import url_slash_cleaner from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.web_store_dialog import WebStoreDialog +from calibre.ebooks.chardet import xml_to_unicode +from calibre.utils.ipc.simple_worker import fork_job, WorkerError + +js_browser = ''' +from calibre.web.jsbrowser.browser import Browser, Timeout +import urllib + +def get_results(url, timeout): + browser = Browser(default_timeout=timeout) + browser.visit(url) + browser.wait_for_element('#nw_content_list') + return browser.html + ''' + class WoblinkStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): @@ -43,6 +57,7 @@ class WoblinkStore(BasicStoreConfig, StorePlugin): d.set_tags(self.config.get('tags', '')) d.exec_() + def search(self, query, max_results=10, timeout=60): url = 'http://woblink.com/katalog-ebooki?query=' + urllib.quote_plus(query.encode('utf-8')) if max_results > 10: @@ -51,42 +66,43 @@ class WoblinkStore(BasicStoreConfig, StorePlugin): else: url += '&limit=20' - br = browser() - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'): - if counter <= 0: - break - id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href')) - if not id: - continue + try: + results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True) + except WorkerError as e: + raise Exception('Could not get results: %s'%e.orig_tb) + doc = html.fromstring(xml_to_unicode(str(results['result']),strip_encoding_pats=True)[0]) + for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'): + if counter <= 0: + break - cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src')) - title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()')) - author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()')) - price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()')) - price = re.sub('\.', ',', price) - formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()')) + id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href')) + if not id: + continue - s = SearchResult() - s.cover_url = 'http://woblink.com' + cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price + ' zł' - s.detail_item = id.strip() - s.formats = formats + cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src')) + title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()')) + author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()')) + formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()')) - if 'EPUB DRM' in formats: - s.drm = SearchResult.DRM_LOCKED + s = SearchResult() + s.cover_url = 'http://woblink.com' + cover_url + s.title = title.strip() + s.author = author.strip() + s.price = price + ' zł' + s.detail_item = id.strip() + s.formats = formats - counter -= 1 - yield s - else: - s.drm = SearchResult.DRM_UNLOCKED + if 'DRM' in formats: + s.drm = SearchResult.DRM_LOCKED - counter -= 1 - yield s + counter -= 1 + yield s + else: + s.drm = SearchResult.DRM_UNLOCKED + + counter -= 1 + yield s