IGN:Various improved recipes

This commit is contained in:
Kovid Goyal 2009-11-18 07:14:15 -07:00
parent 2ce5dec5ee
commit 05bc40b53f
3 changed files with 66 additions and 17 deletions

View File

@ -6,10 +6,10 @@ class HBR(BasicNewsRecipe):
title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
timefmt = ' [%B %Y]'
no_stylesheets = True
no_stylesheets = True
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
INDEX = 'http://hbr.harvardbusiness.org/current'
@ -20,14 +20,14 @@ class HBR(BasicNewsRecipe):
'contentRight', 'summaryLink']),
dict(name='form'),
]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
'''
def get_browser(self):
@ -100,10 +100,10 @@ class HBR(BasicNewsRecipe):
index = 'http://hbr.harvardbusiness.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
if link_item:
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
return cover_url

View File

@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
class KellogInsight(BasicNewsRecipe):
title = 'Kellog Insight'
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Articles from the Kellog School of Management'
no_stylesheets = True
encoding = 'utf-8'
language = 'en'
oldest_article = 60
remove_tags_before = {'name':'h1'}
remove_tags_after = {'class':'col-two-text'}
keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
remove_tags = [dict(name='div', attrs={'class':'col-three'})]
feeds = [('Articles',
'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
extra_css = '''
h1{font-family:arial; font-size:medium; color:#333333;}
.col-one{font-family:arial; font-size:xx-small;}
.col-two{font-family:arial; font-size:x-small; }
h2{font-family:arial; font-size:small; color:#666666;}
h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
h4{color:#660000;font-family:arial; font-size:x-small;}
.col-two-text{font-family:arial; font-size:x-small; color:#333333;}
'''
feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
def get_article_url(self, article):
# Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
return link
self.log('Skipping non-article', link)
return None
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.nextSibling.name = 'h4'
return soup

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sciencenews(BasicNewsRecipe):
title = u'ScienceNews'
__author__ = u'Darko Miletic'
__author__ = u'Darko Miletic and Sujata Raman'
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
oldest_article = 30
language = 'en'
@ -17,13 +17,45 @@ class Sciencenews(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]'
timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':'content_functions_top'})
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue")
print link_item
if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup