IGN:Various improved recipes

This commit is contained in:
Kovid Goyal 2009-11-18 07:14:15 -07:00
parent 2ce5dec5ee
commit 05bc40b53f
3 changed files with 66 additions and 17 deletions

View File

@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe):
title = 'Harvard Business Review' title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org' description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True needs_subscription = True
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
timefmt = ' [%B %Y]' timefmt = ' [%B %Y]'
no_stylesheets = True no_stylesheets = True

View File

@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
class KellogInsight(BasicNewsRecipe): class KellogInsight(BasicNewsRecipe):
title = 'Kellog Insight' title = 'Kellog Insight'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
description = 'Articles from the Kellog School of Management' description = 'Articles from the Kellog School of Management'
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'en' language = 'en'
oldest_article = 60 oldest_article = 60
remove_tags_before = {'name':'h1'}
remove_tags_after = {'class':'col-two-text'}
keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
remove_tags = [dict(name='div', attrs={'class':'col-three'})]
feeds = [('Articles', extra_css = '''
'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')] h1{font-family:arial; font-size:medium; color:#333333;}
.col-one{font-family:arial; font-size:xx-small;}
.col-two{font-family:arial; font-size:x-small; }
h2{font-family:arial; font-size:small; color:#666666;}
h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
h4{color:#660000;font-family:arial; font-size:x-small;}
.col-two-text{font-family:arial; font-size:x-small; color:#333333;}
'''
feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
def get_article_url(self, article): def get_article_url(self, article):
# Get only article not blog links # Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
return link return link
self.log('Skipping non-article', link) self.log('Skipping non-article', link)
return None return None
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.nextSibling.name = 'h4'
return soup

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sciencenews(BasicNewsRecipe): class Sciencenews(BasicNewsRecipe):
title = u'ScienceNews' title = u'ScienceNews'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic and Sujata Raman'
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News." description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
oldest_article = 30 oldest_article = 30
language = 'en' language = 'en'
@ -19,11 +19,43 @@ class Sciencenews(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ] keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'}) remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [ remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'}) dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':'content_functions_top'}) ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
] ]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')] feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue")
print link_item
if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup