calibre/recipes/science_news_recent_issues.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

77 lines
3.2 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
'''
sciencenews.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ScienceNewsIssue(BasicNewsRecipe):
title = u'Science News Recent Issues'
__author__ = u'Darko Miletic, Sujata Raman and Starson17'
description = u'''Science News is an award-winning weekly
newsmagazine covering the most important research in all fields of science.
Its 16 pages each week are packed with short, accurate articles that appeal
to both general readers and scientists. Published since 1922, the magazine
now reaches about 150,000 subscribers and more than 1 million readers.
These are the latest News Items from Science News. This recipe downloads
the last 30 days worth of articles.'''
category = u'Science, Technology, News'
publisher = u'Society for Science & the Public'
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]'
recursions = 1
remove_attributes = ['style']
conversion_options = {'linearize_tables': True, 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [dict(name='div', attrs={'id': 'column_action'})]
remove_tags_after = dict(
name='ul', attrs={'id': 'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id': 'content_functions_bottom'}), dict(name='div', attrs={'id': ['content_functions_top', 'breadcrumb_content']}), dict(
name='img', attrs={'class': 'icon'}), dict(name='div', attrs={'class': 'embiggen'})
]
feeds = [(u"Science News Current Issues",
u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
match_regexps = [
r'www.sciencenews.org/view/feature/id/',
r'www.sciencenews.org/view/generic/id'
]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name='img', alt="issue")
if link_item:
cover_url = 'http://www.sciencenews.org' + \
link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup