Updated Newsweek

This commit is contained in:
Kovid Goyal 2011-05-18 09:15:17 -06:00
parent 1eecc5270c
commit d882c28144

View File

@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
BASE_URL = 'http://www.newsweek.com' BASE_URL = 'http://www.newsweek.com'
topics = {
'Culture' : '/tag/culture.html',
'Business' : '/tag/business.html',
'Society' : '/tag/society.html',
'Science' : '/tag/science.html',
'Education' : '/tag/education.html',
'Politics' : '/tag/politics.html',
'Health' : '/tag/health.html',
'World' : '/tag/world.html',
'Nation' : '/tag/nation.html',
'Technology' : '/tag/technology.html',
'Game Changers' : '/tag/game-changers.html',
}
keep_only_tags = dict(name='article', attrs={'class':'article-text'}) keep_only_tags = dict(name='article', attrs={'class':'article-text'})
remove_tags = [dict(attrs={'data-dartad':True})] remove_tags = [dict(attrs={'data-dartad':True})]
remove_attributes = ['property'] remove_attributes = ['property']
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
return soup return soup
def newsweek_sections(self): def newsweek_sections(self):
return [ for topic_name, topic_url in self.topics.iteritems():
('Nation', 'http://www.newsweek.com/tag/nation.html'), yield (topic_name,
('Society', 'http://www.newsweek.com/tag/society.html'), self.BASE_URL+topic_url)
('Culture', 'http://www.newsweek.com/tag/culture.html'),
('World', 'http://www.newsweek.com/tag/world.html'),
('Politics', 'http://www.newsweek.com/tag/politics.html'),
('Business', 'http://www.newsweek.com/tag/business.html'),
]
def newsweek_parse_section_page(self, soup): def newsweek_parse_section_page(self, soup):
for article in soup.findAll('article', about=True, for article in soup.findAll('article', about=True,