This commit is contained in:
Kovid Goyal 2012-06-27 09:55:25 +05:30
parent 340303b9d6
commit 8459b02276

View File

@ -9,14 +9,18 @@ class NewStatesman(BasicNewsRecipe):
title = 'New Statesman' title = 'New Statesman'
language = 'en_GB' language = 'en_GB'
__author__ = "NotTaken" __author__ = "NotTaken"
description = "Britain's Current Affairs & Politics Magazine (Weekly)" description = "Britain's Current Affairs & Politics Magazine (bi-weekly)"
oldest_article = 7.0 oldest_article = 4.0
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
keep_only_tags = [dict(attrs={'class' : 'node'})] keep_only_tags = [dict(attrs={'class' : 'node'})]
remove_tags_after = [ remove_tags_after = [
dict(attrs={'class' : lambda x: x and 'content123' in x}) dict(attrs={'class' : lambda x: x and 'content123' in x})
] ]
remove_tags = [ remove_tags = [
dict(attrs={'class' : lambda x: x and 'links_bookmark' in x}) dict(attrs={'class' : lambda x: x and 'links_bookmark' in x})
] ]
@ -34,6 +38,24 @@ class NewStatesman(BasicNewsRecipe):
} }
''' '''
processed_urls = []
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
pic = soup.find('img')
if pic is not None:
self.add_toc_thumbnail(article,pic['src'])
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self,article)
if url in self.processed_urls:
self.log('skipping duplicate article: %s' %article.title )
return None
self.processed_urls.append(url)
return url
feeds = [ feeds = [
(u'Politics', (u'Politics',
@ -54,6 +76,8 @@ class NewStatesman(BasicNewsRecipe):
u'http://www.newstatesman.com/world-affairs.rss'), u'http://www.newstatesman.com/world-affairs.rss'),
(u'Sci-Tech', (u'Sci-Tech',
u'http://www.newstatesman.com/feeds/topics/sci-tech.rss'), u'http://www.newstatesman.com/feeds/topics/sci-tech.rss'),
(u'Others',
u'http://www.newstatesman.com/feeds_allsite/site_feed.php'),
] ]