diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe deleted file mode 100644 index 226b783639..0000000000 --- a/recipes/hbr.recipe +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2016, Kovid Goyal - -from calibre.web.feeds.news import BasicNewsRecipe -from css_selectors import Select -from mechanize import Request -import json -try: - from urllib.parse import urlencode -except ImportError: - from urllib import urlencode - - -class HBR(BasicNewsRecipe): - - title = 'Harvard Business Review' - description = 'To subscribe go to http://hbr.harvardbusiness.org' - needs_subscription = True - __author__ = 'Kovid Goyal' - timefmt = ' [%B %Y]' - language = 'en' - no_stylesheets = True - - keep_only_tags = [ - dict(attrs={'class': ['article-hed', 'byline']}), - dict(attrs={'class': lambda x: x and 'article' in x.split()}), - ] - remove_tags = [ - dict(name='personalization-placement'), - ] - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - # br.set_debug_http(True) - br.open('https://hbr.org/sign-in') - rq = Request('https://hbr.org/authenticate', headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': 'https://hbr.org/sign-in', - 'Origin': 'https://hbr.org', - 'X-Requested-With': 'XMLHttpRequest', - 'Accept': 'application/json, text/javascript, */*; q=0.01', - }, data=urlencode({'username': self.username, 'password': self.password})) - r = br.open(rq) - raw = r.read() - data = json.loads(raw) - if data['code'] != 200 or data["message"] != "Authentication Successful.": - raise ValueError('Failed to log in check username/password') - return br - - def hbr_parse_toc(self, url): - root = self.index_to_soup(url, as_tree=True) - select = Select(root) - section = 'Unknown' - articles = [] - feeds = [] - toc = next( - select('stream-content[data-stream-name="table-of-contents"] ul')) - for x in toc.xpath('descendant::*[local-name()="h4" or local-name()="stream-item"]'): - if 'h4' in x.tag: - if articles: - feeds.append((section, articles)) - section = self.tag_to_string(x) - articles = [] - self.log('Found section:', section) - else: - title, url = x.get('data-title'), x.get('data-url') - desc = ''.join(c.tail or '' for c in x).strip() - authors = x.get('data-authors') - if authors: - desc = 'by ' + authors + ': ' + desc - self.log('\tFound article:', title, url, desc) - articles.append( - {'title': title, 'url': 'https://hbr.org' + url, 'description': desc}) - if articles: - feeds.append((section, articles)) - return feeds - - def parse_index(self): - soup = self.index_to_soup('http://hbr.org/magazine') - fig = soup.find('figure', attrs={ - 'class': lambda x: x and 'current-issue' in x.split()}) - url = 'https://hbr.org' + fig.find('a', href=True)['href'] - img = fig.find('img') - self.cover_url = 'https://hbr.org' + img['src'] - self.timefmt = img['alt'] - return self.hbr_parse_toc(url)