Refactor Woblink store plugin to not use Qt WebKit

This commit is contained in:
Kovid Goyal 2016-04-27 08:09:45 +05:30
parent 60a531283c
commit c82c1235e1

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 10 # Needed for dynamic plugin loading store_version = 11 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011-2014, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2011-2014, Tomasz Długosz <tomek3d@gmail.com>'
@ -11,65 +11,40 @@ import urllib
from base64 import b64encode from base64 import b64encode
from lxml import html from lxml import html
from mechanize import Request
from PyQt5.Qt import QUrl from PyQt5.Qt import QUrl
from calibre import url_slash_cleaner from calibre import url_slash_cleaner, browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
from calibre.ebooks.chardet import strip_encoding_declarations def search(query, max_results=10, timeout=60):
from calibre.utils.ipc.simple_worker import fork_job, WorkerError url = 'http://woblink.com/publication/ajax?mode=none&query=' + urllib.quote_plus(query.encode('utf-8'))
js_browser = '''
from calibre.web.jsbrowser.browser import Browser, Timeout
import urllib
def get_results(url, timeout):
browser = Browser(default_timeout=timeout, user_agent='CalibreCrawler/1.0')
browser.visit(url)
browser.wait_for_element('#nw_kontent_main')
return browser.html
'''
class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
url = 'http://woblink.com/publication'
aff_url = aff_root + str(b64encode(url))
detail_url = None
if detail_item:
detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item))
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://woblink.com/ebooki-kategorie?query=' + urllib.quote_plus(query.encode('utf-8'))
if max_results > 10: if max_results > 10:
if max_results > 20: if max_results > 20:
url += '&limit=30' url += '&limit=30'
else: else:
url += '&limit=20' url += '&limit=20'
br = browser(user_agent='CalibreCrawler/1.0')
br.set_handle_gzip(True)
rq = Request(url, headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Referrer':'http://woblink.com/ebooki-kategorie',
'Cache-Control':'max-age=0',
}, data=urllib.urlencode({
'nw_filtry_filtr_zakrescen_formularz[min]':'0',
'nw_filtry_filtr_zakrescen_formularz[max]':'350',
}))
r = br.open(rq)
raw = r.read()
doc = html.fromstring('<html><body>' + raw.decode('utf-8') + '</body></html>')
counter = max_results counter = max_results
try:
results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
except WorkerError as e:
raise Exception('Could not get results: %s'%e.orig_tb)
doc = html.fromstring(strip_encoding_declarations(results['result']))
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'): for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
if counter <= 0: if counter <= 0:
break break
@ -92,13 +67,35 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
s.detail_item = id.strip() s.detail_item = id.strip()
s.formats = formats s.formats = formats
if 'DRM' in formats:
s.drm = SearchResult.DRM_LOCKED
counter -= 1 counter -= 1
s.drm = SearchResult.DRM_LOCKED if 'DRM' in formats else SearchResult.DRM_UNLOCKED
yield s yield s
class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
url = 'http://woblink.com/publication'
aff_url = aff_root + str(b64encode(url))
detail_url = None
if detail_item:
detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item))
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
else: else:
s.drm = SearchResult.DRM_UNLOCKED d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
counter -= 1 def search(self, query, max_results=10, timeout=60):
for s in search(query, max_results, timeout):
yield s yield s
if __name__ == '__main__':
from pprint import pprint
pprint(list(search('Franciszek')))