Update Science News

This commit is contained in:
Kovid Goyal 2013-02-25 10:07:21 +05:30
parent 458209bbf9
commit 9d8a89d6e5

View File

@ -1,24 +1,38 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
sciencenews.org sciencenews.org
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Sciencenews(BasicNewsRecipe): class ScienceNewsIssue(BasicNewsRecipe):
title = u'ScienceNews' title = u'Science News Recent Issues'
__author__ = u'Darko Miletic and Sujata Raman' __author__ = u'Darko Miletic, Sujata Raman and Starson17'
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News." description = u'''Science News is an award-winning weekly
newsmagazine covering the most important research in all fields of science.
Its 16 pages each week are packed with short, accurate articles that appeal
to both general readers and scientists. Published since 1922, the magazine
now reaches about 150,000 subscribers and more than 1 million readers.
These are the latest News Items from Science News. This recipe downloads
the last 30 days worth of articles.'''
category = u'Science, Technology, News'
publisher = u'Society for Science & the Public'
oldest_article = 30 oldest_article = 30
language = 'en' language = 'en'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
timefmt = ' [%A, %d %B, %Y]' timefmt = ' [%A, %d %B, %Y]'
recursions = 1
remove_attributes = ['style']
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
extra_css = ''' extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;} .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
@ -27,36 +41,33 @@ class Sciencenews(BasicNewsRecipe):
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;} .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;} .exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;} .anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;} .content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;} .description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;} .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
''' '''
#keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ] keep_only_tags = [ dict(name='div', attrs={'class':'content_content'}),
#remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'}) dict(name='ul', attrs={'id':'toc'})
#remove_tags = [ ]
#dict(name='ul', attrs={'id':'content_functions_bottom'})
#,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
#,dict(name='img', attrs={'class':'icon'})
#,dict(name='div', attrs={'class': 'embiggen'})
#]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')] feeds = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
match_regexps = [
r'www.sciencenews.org/view/feature/id/',
r'www.sciencenews.org/view/generic/id'
]
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
index = 'http://www.sciencenews.org/view/home' index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index) soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue") link_item = soup.find(name = 'img',alt = "issue")
print link_item
if link_item: if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg' cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url return cover_url
#def preprocess_html(self, soup): def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
#for tag in soup.findAll(name=['span']): tag.name = 'div'
#tag.name = 'div' return soup
#return soup