calibre/recipes/phillosophy_now.recipe

101 lines
3.5 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
from collections import OrderedDict
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe, classes
class PhilosophyNow(BasicNewsRecipe):
title = 'Philosophy Now'
__author__ = 'unkn0wn'
description = '''Philosophy Now is a lively magazine for everyone
interested in ideas. It isn't afraid to tackle all the major questions of
life, the universe and everything. Published every two months, it tries to
corrupt innocent citizens by convincing them that philosophy can be
exciting, worthwhile and comprehensible, and also to provide some enjoyable
reading matter for those already ensnared by the muse, such as philosophy
students and academics.'''
language = 'en'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
remove_attributes = ['height', 'width', 'style']
encoding = 'utf-8'
ignore_duplicate_articles = {'url'}
masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png'
keep_only_tags = [classes('article_page')]
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
extra_css = '''
img {display:block; margin:0 auto;}
.articleImageCaption { font-size:small; text-align:center; }
em, blockquote { color:#202020; }
'''
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 136'
}
}
def parse_index(self):
soup = self.index_to_soup('https://philosophynow.org/')
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
url = 'https://philosophynow.org/issues/' + d
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
cov_url = div.find('img', src=True)['src']
self.cover_url = 'https://philosophynow.org' + cov_url
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
feeds = OrderedDict()
for h2 in soup.findAll('h2', attrs={'class':'article_list_title'}):
articles = []
a = h2.find('a', href=True)
url = a['href']
url = 'https://philosophynow.org' + url
title = self.tag_to_string(a)
des = h2.find_next_sibling('p')
if des:
desc = self.tag_to_string(des)
h3 = h2.find_previous_sibling('h3')
section_title = self.tag_to_string(h3).title()
self.log('\t', title)
self.log('\t', desc)
self.log('\t\t', url)
articles.append({
'title': title,
'url': url,
'description': desc})
if articles:
if section_title not in feeds:
feeds[section_title] = []
feeds[section_title] += articles
ans = list(feeds.items())
return ans
# PN changes the content it delivers based on cookies, so the
# following ensures that we send no cookies
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit