mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
HBR no longer use Qt WebKit
This commit is contained in:
parent
83c4a67765
commit
d46ff32eb2
@ -1,19 +1,23 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from css_selectors import Select
|
from css_selectors import Select
|
||||||
|
from mechanize import Request
|
||||||
|
from urllib import urlencode
|
||||||
|
import json
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Harvard Business Review'
|
title = 'Harvard Business Review'
|
||||||
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal'
|
||||||
timefmt = ' [%B %Y]'
|
timefmt = ' [%B %Y]'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
|
||||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(attrs={'class':['article-hed', 'byline']}),
|
dict(attrs={'class':['article-hed', 'byline']}),
|
||||||
dict(attrs={'class':lambda x: x and 'article' in x.split()}),
|
dict(attrs={'class':lambda x: x and 'article' in x.split()}),
|
||||||
@ -21,16 +25,22 @@ class HBR(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='personalization-placement'),
|
dict(name='personalization-placement'),
|
||||||
]
|
]
|
||||||
use_javascript_to_login = True
|
|
||||||
|
|
||||||
def javascript_login(self, br, username, password):
|
def get_browser(self):
|
||||||
br.visit('https://hbr.org/sign-in')
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
br.run_for_a_time(15)
|
# br.set_debug_http(True)
|
||||||
f = br.select_form('sign-in form')
|
br.open('https://hbr.org/sign-in')
|
||||||
f['login-email'] = username
|
rq = Request('https://hbr.org/authenticate', headers={
|
||||||
f['login-password'] = password
|
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
br.submit('[js-target="submit-sign-in"]', wait_for_load=False)
|
'Referer': 'https://hbr.org/sign-in',
|
||||||
br.run_for_a_time(15)
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
}, data=urlencode({'username':self.username, 'password':self.password}))
|
||||||
|
r = br.open(rq)
|
||||||
|
raw = r.read()
|
||||||
|
data = json.loads(raw)
|
||||||
|
if data['code'] != 200 or data["message"] != "Authentication Successful":
|
||||||
|
raise ValueError('Failed to log in check username/password')
|
||||||
|
return br
|
||||||
|
|
||||||
def hbr_parse_toc(self, url):
|
def hbr_parse_toc(self, url):
|
||||||
root = self.index_to_soup(url, as_tree=True)
|
root = self.index_to_soup(url, as_tree=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user