Fix Nature news.

This commit is contained in:
Adrian Fiergolski 2016-03-30 19:37:16 +02:00
parent 9235097db2
commit 7f2e6088b8

View File

@ -3,13 +3,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class NatureNews(BasicNewsRecipe):
title = u'Nature News'
language = 'en'
__author__ = 'Krittika Goyal, Starson17'
__author__ = 'Krittika Goyal, Starson17, adrianf0'
oldest_article = 31 #days
remove_empty_feeds = True
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
extra_css = '''
.author { text-align: right; font-size: small; line-height:1em; margin-top:0px; margin-left:0; margin-right:0; margin-bottom: 0; }
@ -17,5 +18,22 @@ class NatureNews(BasicNewsRecipe):
.imagecredit { font-size: x-small; font-style: normal; font-weight: bold}
'''
feeds = [('Nature News', 'http://feeds.nature.com/news/rss/most_recent')]
feeds = [(u'Nature News', 'http://feeds.nature.com/NatureNewsComment')] #News and comments
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'top-row'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'authors cleared'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cleared article-tools extra'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'related-stories-box box'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'related-links'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'not-logged-in'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'endnotes'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'author-details-below'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'references'}))
remove_tags.append(dict(name = 'a', attrs = {'class' : 'rss-link'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-avatar'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'moderation'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'img img-right'})) #links to other articles, main conntent is img-middle
remove_tags.append(dict(name = 'div', attrs = {'class' : 'pullquote pullquote-left'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'pullquote pullquote-right'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cleared subject-terms-container'}))