HBR no longer use Qt WebKit

This commit is contained in:
Kovid Goyal 2016-04-25 14:33:54 +05:30
parent 83c4a67765
commit d46ff32eb2

View File

@ -1,19 +1,23 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from css_selectors import Select from css_selectors import Select
from mechanize import Request
from urllib import urlencode
import json
class HBR(BasicNewsRecipe): class HBR(BasicNewsRecipe):
title = 'Harvard Business Review' title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org' description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
timefmt = ' [%B %Y]' timefmt = ' [%B %Y]'
language = 'en' language = 'en'
no_stylesheets = True no_stylesheets = True
LOGIN_URL = 'https://hbr.org/login?request_url=/'
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class':['article-hed', 'byline']}), dict(attrs={'class':['article-hed', 'byline']}),
dict(attrs={'class':lambda x: x and 'article' in x.split()}), dict(attrs={'class':lambda x: x and 'article' in x.split()}),
@ -21,16 +25,22 @@ class HBR(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='personalization-placement'), dict(name='personalization-placement'),
] ]
use_javascript_to_login = True
def javascript_login(self, br, username, password): def get_browser(self):
br.visit('https://hbr.org/sign-in') br = BasicNewsRecipe.get_browser(self)
br.run_for_a_time(15) # br.set_debug_http(True)
f = br.select_form('sign-in form') br.open('https://hbr.org/sign-in')
f['login-email'] = username rq = Request('https://hbr.org/authenticate', headers={
f['login-password'] = password 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
br.submit('[js-target="submit-sign-in"]', wait_for_load=False) 'Referer': 'https://hbr.org/sign-in',
br.run_for_a_time(15) 'X-Requested-With': 'XMLHttpRequest',
}, data=urlencode({'username':self.username, 'password':self.password}))
r = br.open(rq)
raw = r.read()
data = json.loads(raw)
if data['code'] != 200 or data["message"] != "Authentication Successful":
raise ValueError('Failed to log in check username/password')
return br
def hbr_parse_toc(self, url): def hbr_parse_toc(self, url):
root = self.index_to_soup(url, as_tree=True) root = self.index_to_soup(url, as_tree=True)