calibre/recipes/nationalgeographic.recipe

40 lines
1.2 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
nationalgeographic.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class NationalGeographicNews(BasicNewsRecipe):
title = u'National Geographic News'
oldest_article = 7
language = 'en'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
feeds = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
remove_tags_before = dict(id='page_head')
remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
remove_tags = [
{'class':'hidden'}
]
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'ads\.pheedo\.com', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds