Update Philosophy Now

This commit is contained in:
Kovid Goyal 2022-06-16 11:56:39 +05:30
parent c0a2656cb2
commit 211efd35d1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,12 +1,12 @@
import re from calibre.web.feeds.news import BasicNewsRecipe, classes
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre import browser
from collections import OrderedDict from collections import OrderedDict
class PhilosophyNow(BasicNewsRecipe): class PhilosophyNow(BasicNewsRecipe):
title = 'Philosophy Now' title = 'Philosophy Now'
__author__ = 'Rick Shang' __author__ = 'unkn0wn'
description = '''Philosophy Now is a lively magazine for everyone description = '''Philosophy Now is a lively magazine for everyone
interested in ideas. It isn't afraid to tackle all the major questions of interested in ideas. It isn't afraid to tackle all the major questions of
life, the universe and everything. Published every two months, it tries to life, the universe and everything. Published every two months, it tries to
@ -15,65 +15,64 @@ class PhilosophyNow(BasicNewsRecipe):
reading matter for those already ensnared by the muse, such as philosophy reading matter for those already ensnared by the muse, such as philosophy
students and academics.''' students and academics.'''
language = 'en' language = 'en'
category = 'news' use_embedded_content = False
encoding = 'UTF-8'
keep_only_tags = [dict(attrs={'id': 'fullMainColumn'})]
remove_tags = [dict(attrs={'class': 'articleTools'})]
no_javascript = True
no_stylesheets = True no_stylesheets = True
needs_subscription = True remove_javascript = True
remove_attributes = ['height', 'width', 'style']
encoding = 'utf-8'
ignore_duplicate_articles = {'url'}
def get_browser(self): keep_only_tags = [classes('article_page')]
br = BasicNewsRecipe.get_browser(self) remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
br.open('https://philosophynow.org/auth/login')
br.select_form(name="loginForm")
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self): def parse_index(self):
# Go to the issue soup = self.index_to_soup('https://philosophynow.org/')
soup0 = self.index_to_soup('http://philosophynow.org/') div = soup.find('div', attrs={'id': 'aside_issue_cover'})
issue = soup0.find('div', attrs={'id': 'navColumn'}) url = div.find('a', href=True)['href']
for issue in div.findAll('div', attrs={'id':'aside_issue_text'}):
# Find date & cover self.log('Downloading issue:', self.tag_to_string(issue).strip())
cover = issue.find('div', attrs={'id': 'cover'}) cov_url = div.find('img', src=True)['src']
date = self.tag_to_string(cover.find('h3')).strip() self.cover_url = 'https://philosophynow.org' + cov_url
self.timefmt = u' [%s]' % date soup = self.index_to_soup('https://philosophynow.org' + url)
img = cover.find('img', src=True)['src']
self.cover_url = 'http://philosophynow.org' + \
re.sub('medium', 'large', img)
issuenum = re.sub('/media/images/covers/medium/issue', '', img)
issuenum = re.sub('.jpg', '', issuenum)
# Go to the main body
current_issue_url = 'http://philosophynow.org/issues/' + issuenum
soup = self.index_to_soup(current_issue_url)
div = soup.find('div', attrs={'class': 'contentsColumn'})
feeds = OrderedDict() feeds = OrderedDict()
for post in div.findAll('h1'): for h2 in soup.findAll('h2', attrs={'class':'article_list_title'}):
articles = [] articles = []
a = post.find('a', href=True) a = h2.find('a', href=True)
if a is not None: url = a['href']
url = "http://philosophynow.org" + a['href'] url = 'https://philosophynow.org' + url
title = self.tag_to_string(a).strip() title = self.tag_to_string(a)
s = post.findPrevious('h3') des = h2.find_next_sibling('p')
section_title = self.tag_to_string(s).strip() if des:
d = post.findNext('h2') desc = self.tag_to_string(des)
desc = self.tag_to_string(d).strip() h3 = h2.find_previous_sibling('h3')
articles.append({'title': title, 'url': url, section_title = self.tag_to_string(h3).title()
'description': desc, 'date': ''}) self.log('\t', title)
self.log('\t', desc)
self.log('\t\t', url)
articles.append({
'title': title,
'url': url,
'description': desc})
if articles: if articles:
if section_title not in feeds: if section_title not in feeds:
feeds[section_title] = [] feeds[section_title] = []
feeds[section_title] += articles feeds[section_title] += articles
ans = [(key, val) for key, val in feeds.items()] ans = [(key, val) for key, val in feeds.items()]
return ans return ans
def cleanup(self): # PN changes the content it delivers based on cookies, so the
self.browser.open('http://philosophynow.org/auth/logout') # following ensures that we send no cookies
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit