mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
101 lines
3.5 KiB
Python
101 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
from collections import OrderedDict
|
|
|
|
from calibre import browser
|
|
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|
|
|
|
|
class PhilosophyNow(BasicNewsRecipe):
|
|
|
|
title = 'Philosophy Now'
|
|
__author__ = 'unkn0wn'
|
|
description = '''Philosophy Now is a lively magazine for everyone
|
|
interested in ideas. It isn't afraid to tackle all the major questions of
|
|
life, the universe and everything. Published every two months, it tries to
|
|
corrupt innocent citizens by convincing them that philosophy can be
|
|
exciting, worthwhile and comprehensible, and also to provide some enjoyable
|
|
reading matter for those already ensnared by the muse, such as philosophy
|
|
students and academics.'''
|
|
language = 'en'
|
|
use_embedded_content = False
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
remove_attributes = ['height', 'width', 'style']
|
|
encoding = 'utf-8'
|
|
ignore_duplicate_articles = {'url'}
|
|
masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png'
|
|
|
|
keep_only_tags = [classes('article_page')]
|
|
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
|
extra_css = '''
|
|
img {display:block; margin:0 auto;}
|
|
.articleImageCaption { font-size:small; text-align:center; }
|
|
em, blockquote { color:#202020; }
|
|
'''
|
|
|
|
recipe_specific_options = {
|
|
'issue': {
|
|
'short': 'Enter the Issue Number you want to download ',
|
|
'long': 'For example, 136'
|
|
}
|
|
}
|
|
|
|
def parse_index(self):
|
|
soup = self.index_to_soup('https://philosophynow.org/')
|
|
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
|
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
|
|
|
|
d = self.recipe_specific_options.get('issue')
|
|
if d and isinstance(d, str):
|
|
url = 'https://philosophynow.org/issues/' + d
|
|
|
|
soup = self.index_to_soup(url)
|
|
|
|
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
|
|
cov_url = div.find('img', src=True)['src']
|
|
self.cover_url = 'https://philosophynow.org' + cov_url
|
|
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
|
|
|
|
feeds = OrderedDict()
|
|
|
|
for h2 in soup.findAll('h2', attrs={'class':'article_list_title'}):
|
|
articles = []
|
|
a = h2.find('a', href=True)
|
|
url = a['href']
|
|
url = 'https://philosophynow.org' + url
|
|
title = self.tag_to_string(a)
|
|
des = h2.find_next_sibling('p')
|
|
if des:
|
|
desc = self.tag_to_string(des)
|
|
h3 = h2.find_previous_sibling('h3')
|
|
section_title = self.tag_to_string(h3).title()
|
|
self.log('\t', title)
|
|
self.log('\t', desc)
|
|
self.log('\t\t', url)
|
|
articles.append({
|
|
'title': title,
|
|
'url': url,
|
|
'description': desc})
|
|
|
|
if articles:
|
|
if section_title not in feeds:
|
|
feeds[section_title] = []
|
|
feeds[section_title] += articles
|
|
ans = list(feeds.items())
|
|
return ans
|
|
|
|
# PN changes the content it delivers based on cookies, so the
|
|
# following ensures that we send no cookies
|
|
def get_browser(self, *args, **kwargs):
|
|
return self
|
|
|
|
def clone_browser(self, *args, **kwargs):
|
|
return self.get_browser()
|
|
|
|
def open_novisit(self, *args, **kwargs):
|
|
br = browser()
|
|
return br.open_novisit(*args, **kwargs)
|
|
|
|
open = open_novisit
|