calibre/recipes/phillosophy_now.recipe

80 lines
3.0 KiB
Plaintext

import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict
class PhilosophyNow(BasicNewsRecipe):
title = 'Philosophy Now'
__author__ = 'Rick Shang'
description = '''Philosophy Now is a lively magazine for everyone
interested in ideas. It isn't afraid to tackle all the major questions of
life, the universe and everything. Published every two months, it tries to
corrupt innocent citizens by convincing them that philosophy can be
exciting, worthwhile and comprehensible, and also to provide some enjoyable
reading matter for those already ensnared by the muse, such as philosophy
students and academics.'''
language = 'en'
category = 'news'
encoding = 'UTF-8'
keep_only_tags = [dict(attrs={'id': 'fullMainColumn'})]
remove_tags = [dict(attrs={'class': 'articleTools'})]
no_javascript = True
no_stylesheets = True
needs_subscription = True
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('https://philosophynow.org/auth/login')
br.select_form(name="loginForm")
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
# Go to the issue
soup0 = self.index_to_soup('http://philosophynow.org/')
issue = soup0.find('div', attrs={'id': 'navColumn'})
# Find date & cover
cover = issue.find('div', attrs={'id': 'cover'})
date = self.tag_to_string(cover.find('h3')).strip()
self.timefmt = u' [%s]' % date
img = cover.find('img', src=True)['src']
self.cover_url = 'http://philosophynow.org' + \
re.sub('medium', 'large', img)
issuenum = re.sub('/media/images/covers/medium/issue', '', img)
issuenum = re.sub('.jpg', '', issuenum)
# Go to the main body
current_issue_url = 'http://philosophynow.org/issues/' + issuenum
soup = self.index_to_soup(current_issue_url)
div = soup.find('div', attrs={'class': 'contentsColumn'})
feeds = OrderedDict()
for post in div.findAll('h1'):
articles = []
a = post.find('a', href=True)
if a is not None:
url = "http://philosophynow.org" + a['href']
title = self.tag_to_string(a).strip()
s = post.findPrevious('h3')
section_title = self.tag_to_string(s).strip()
d = post.findNext('h2')
desc = self.tag_to_string(d).strip()
articles.append({'title': title, 'url': url,
'description': desc, 'date': ''})
if articles:
if section_title not in feeds:
feeds[section_title] = []
feeds[section_title] += articles
ans = [(key, val) for key, val in feeds.items()]
return ans
def cleanup(self):
self.browser.open('http://philosophynow.org/auth/logout')