Update Woblink get books plugin to deal with website changes

Merge branch 'master' of https://github.com/t3d/calibre
This commit is contained in:
Kovid Goyal 2014-05-21 06:52:06 +05:30
commit 35d3531bfc

View File

@ -1,28 +1,40 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 7 # Needed for dynamic plugin loading store_version = 8 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011-2014, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2011-2014, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
import urllib import urllib
from base64 import b64encode from base64 import b64encode
from contextlib import closing
from lxml import html from lxml import html
from PyQt4.Qt import QUrl from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner from calibre import url_slash_cleaner
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
from calibre.ebooks.chardet import strip_encoding_declarations
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
js_browser = '''
from calibre.web.jsbrowser.browser import Browser, Timeout
import urllib
def get_results(url, timeout):
browser = Browser(default_timeout=timeout)
browser.visit(url)
browser.wait_for_element('#nw_content_list')
return browser.html
'''
class WoblinkStore(BasicStoreConfig, StorePlugin): class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
@ -51,42 +63,42 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
else: else:
url += '&limit=20' url += '&limit=20'
br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href')) try:
if not id: results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
continue except WorkerError as e:
raise Exception('Could not get results: %s'%e.orig_tb)
doc = html.fromstring(strip_encoding_declarations(results['result']))
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'):
if counter <= 0:
break
cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src')) id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()')) if not id:
author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()')) continue
price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()'))
price = re.sub('\.', ',', price)
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()'))
s = SearchResult() cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
s.cover_url = 'http://woblink.com' + cover_url title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
s.title = title.strip() author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
s.author = author.strip() price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()'))
s.price = price + '' formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'))
s.detail_item = id.strip()
s.formats = formats
if 'EPUB DRM' in formats: s = SearchResult()
s.drm = SearchResult.DRM_LOCKED s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price + ''
s.detail_item = id.strip()
s.formats = formats
counter -= 1 if 'DRM' in formats:
yield s s.drm = SearchResult.DRM_LOCKED
else:
s.drm = SearchResult.DRM_UNLOCKED
counter -= 1 counter -= 1
yield s yield s
else:
s.drm = SearchResult.DRM_UNLOCKED
counter -= 1
yield s