Refactor Woblink store plugin to not use Qt WebKit

This commit is contained in:
Kovid Goyal 2016-04-27 08:09:45 +05:30
parent 60a531283c
commit c82c1235e1

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 10 # Needed for dynamic plugin loading
store_version = 11 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011-2014, Tomasz Długosz <tomek3d@gmail.com>'
@ -11,65 +11,40 @@ import urllib
from base64 import b64encode
from lxml import html
from mechanize import Request
from PyQt5.Qt import QUrl
from calibre import url_slash_cleaner
from calibre import url_slash_cleaner, browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
from calibre.ebooks.chardet import strip_encoding_declarations
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
js_browser = '''
from calibre.web.jsbrowser.browser import Browser, Timeout
import urllib
def get_results(url, timeout):
browser = Browser(default_timeout=timeout, user_agent='CalibreCrawler/1.0')
browser.visit(url)
browser.wait_for_element('#nw_kontent_main')
return browser.html
'''
class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
url = 'http://woblink.com/publication'
aff_url = aff_root + str(b64encode(url))
detail_url = None
if detail_item:
detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item))
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://woblink.com/ebooki-kategorie?query=' + urllib.quote_plus(query.encode('utf-8'))
def search(query, max_results=10, timeout=60):
url = 'http://woblink.com/publication/ajax?mode=none&query=' + urllib.quote_plus(query.encode('utf-8'))
if max_results > 10:
if max_results > 20:
url += '&limit=30'
else:
url += '&limit=20'
br = browser(user_agent='CalibreCrawler/1.0')
br.set_handle_gzip(True)
rq = Request(url, headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Referrer':'http://woblink.com/ebooki-kategorie',
'Cache-Control':'max-age=0',
}, data=urllib.urlencode({
'nw_filtry_filtr_zakrescen_formularz[min]':'0',
'nw_filtry_filtr_zakrescen_formularz[max]':'350',
}))
r = br.open(rq)
raw = r.read()
doc = html.fromstring('<html><body>' + raw.decode('utf-8') + '</body></html>')
counter = max_results
try:
results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
except WorkerError as e:
raise Exception('Could not get results: %s'%e.orig_tb)
doc = html.fromstring(strip_encoding_declarations(results['result']))
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
if counter <= 0:
break
@ -92,13 +67,35 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
s.detail_item = id.strip()
s.formats = formats
if 'DRM' in formats:
s.drm = SearchResult.DRM_LOCKED
counter -= 1
s.drm = SearchResult.DRM_LOCKED if 'DRM' in formats else SearchResult.DRM_UNLOCKED
yield s
class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
url = 'http://woblink.com/publication'
aff_url = aff_root + str(b64encode(url))
detail_url = None
if detail_item:
detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item))
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
else:
s.drm = SearchResult.DRM_UNLOCKED
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
counter -= 1
def search(self, query, max_results=10, timeout=60):
for s in search(query, max_results, timeout):
yield s
if __name__ == '__main__':
from pprint import pprint
pprint(list(search('Franciszek')))