Clean up BusinessWeek recipe

This commit is contained in:
Kovid Goyal 2008-05-29 13:00:29 -07:00
parent 445fd0f6b7
commit d77746ea07
2 changed files with 11 additions and 2 deletions

View File

@ -124,6 +124,8 @@ class Feed(object):
content = '\n'.join(i.value for i in item.get('content', []))
if not content.strip():
content = None
if not link and not content:
return
article = Article(id, title, link, description, published, content)
delta = datetime.utcnow() - article.utctime

View File

@ -14,13 +14,14 @@ class BusinessWeek(BasicNewsRecipe):
description = 'Business News, Stock Market and Financial Advice'
__author__ = 'ChuckEggDotCom'
oldest_article = 7
max_articles_per_feed = 100
max_articles_per_feed = 10
remove_tags_before = dict(name='h1')
remove_tags_after = dict(id='footer')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool']}),
dict(id=['footer', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript'])]
dict(name='h2', attrs={'class':'listspace'}),
]
feeds = [
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
@ -45,5 +46,11 @@ class BusinessWeek(BasicNewsRecipe):
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
]
def get_article_url(self, article):
url = article.get('guid', None)
if 'podcasts' in url:
url = None
return url
def print_version(self, url):
return url.replace('http://www.businessweek.com/', 'http://www.businessweek.com/print/')