Fix #6401 (Updated Recipe: Nature News)

This commit is contained in:
Kovid Goyal 2010-08-04 08:24:13 -06:00
parent 01298aeb91
commit 72fe5ee438

View File

@ -4,28 +4,23 @@ import re
class NatureNews(BasicNewsRecipe): class NatureNews(BasicNewsRecipe):
title = u'Nature News' title = u'Nature News'
language = 'en' language = 'en'
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal, Starson17'
oldest_article = 31 #days oldest_article = 31 #days
remove_empty_feeds = True
max_articles_per_feed = 50 max_articles_per_feed = 50
#encoding = 'latin1'
no_stylesheets = True no_stylesheets = True
remove_tags_before = dict(name='h1', attrs={'class':'heading entry-title'}) remove_tags_before = dict(name='h1', attrs={'class':'heading entry-title'})
remove_tags_after = dict(name='h2', attrs={'id':'comments'}) remove_tags_after = dict(name='h2', attrs={'id':'comments'})
remove_tags = [ remove_tags = [
#dict(name='iframe'),
#dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content']}),
#dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}),
dict(name='h2', attrs={'id':'comments'}), dict(name='h2', attrs={'id':'comments'}),
dict(name='ul', attrs={'class':'toolsmenu xoxo'}), dict(attrs={'alt':'Advertisement'}),
dict(name='div', attrs={'class':'ad'}),
] ]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: '') (re.compile(r'<p>ADVERTISEMENT</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
] ]
feeds = [('Nature News', 'http://feeds.nature.com/news/rss/most_recent')] feeds = [('Nature News', 'http://feeds.nature.com/news/rss/most_recent')]
def get_article_url(self, article):
return article.get('id')