mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
77 lines
3.2 KiB
Python
77 lines
3.2 KiB
Python
#!/usr/bin/env python
|
|
|
|
__license__ = 'GPL v3'
|
|
'''
|
|
sciencenews.org
|
|
'''
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class ScienceNewsIssue(BasicNewsRecipe):
|
|
title = u'Science News Recent Issues'
|
|
__author__ = u'Darko Miletic, Sujata Raman and Starson17'
|
|
description = u'''Science News is an award-winning weekly
|
|
newsmagazine covering the most important research in all fields of science.
|
|
Its 16 pages each week are packed with short, accurate articles that appeal
|
|
to both general readers and scientists. Published since 1922, the magazine
|
|
now reaches about 150,000 subscribers and more than 1 million readers.
|
|
These are the latest News Items from Science News. This recipe downloads
|
|
the last 30 days worth of articles.'''
|
|
category = u'Science, Technology, News'
|
|
publisher = u'Society for Science & the Public'
|
|
oldest_article = 30
|
|
language = 'en'
|
|
max_articles_per_feed = 100
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
timefmt = ' [%A, %d %B, %Y]'
|
|
recursions = 1
|
|
remove_attributes = ['style']
|
|
|
|
conversion_options = {'linearize_tables': True, 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
}
|
|
|
|
extra_css = '''
|
|
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
|
|
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
|
|
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
|
|
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
|
|
.exclusive{color:#FF0000 ;}
|
|
.anonymous{color:#14487E ;}
|
|
.content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
|
|
.description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
|
|
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
|
|
'''
|
|
|
|
keep_only_tags = [dict(name='div', attrs={'id': 'column_action'})]
|
|
remove_tags_after = dict(
|
|
name='ul', attrs={'id': 'content_functions_bottom'})
|
|
remove_tags = [
|
|
dict(name='ul', attrs={'id': 'content_functions_bottom'}), dict(name='div', attrs={'id': ['content_functions_top', 'breadcrumb_content']}), dict(
|
|
name='img', attrs={'class': 'icon'}), dict(name='div', attrs={'class': 'embiggen'})
|
|
]
|
|
|
|
feeds = [(u"Science News Current Issues",
|
|
u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
|
|
|
|
match_regexps = [
|
|
r'www.sciencenews.org/view/feature/id/',
|
|
r'www.sciencenews.org/view/generic/id'
|
|
]
|
|
|
|
def get_cover_url(self):
|
|
cover_url = None
|
|
index = 'http://www.sciencenews.org/view/home'
|
|
soup = self.index_to_soup(index)
|
|
link_item = soup.find(name='img', alt="issue")
|
|
if link_item:
|
|
cover_url = 'http://www.sciencenews.org' + \
|
|
link_item['src'] + '.jpg'
|
|
|
|
return cover_url
|
|
|
|
def preprocess_html(self, soup):
|
|
for tag in soup.findAll(name=['span']):
|
|
tag.name = 'div'
|
|
return soup
|