From d77746ea079bd9154e675102ff5d1f09bd7434fc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 May 2008 13:00:29 -0700 Subject: [PATCH] Clean up BusinessWeek recipe --- src/calibre/web/feeds/__init__.py | 2 ++ src/calibre/web/feeds/recipes/business_week.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 06e27e0008..0d869e36d2 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -124,6 +124,8 @@ class Feed(object): content = '\n'.join(i.value for i in item.get('content', [])) if not content.strip(): content = None + if not link and not content: + return article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime diff --git a/src/calibre/web/feeds/recipes/business_week.py b/src/calibre/web/feeds/recipes/business_week.py index 94e56c27fa..794bdf9582 100644 --- a/src/calibre/web/feeds/recipes/business_week.py +++ b/src/calibre/web/feeds/recipes/business_week.py @@ -14,13 +14,14 @@ class BusinessWeek(BasicNewsRecipe): description = 'Business News, Stock Market and Financial Advice' __author__ = 'ChuckEggDotCom' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 10 remove_tags_before = dict(name='h1') remove_tags_after = dict(id='footer') remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool']}), dict(id=['footer', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), - dict(name=['script', 'noscript'])] + dict(name='h2', attrs={'class':'listspace'}), + ] feeds = [ (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'), @@ -45,5 +46,11 @@ class BusinessWeek(BasicNewsRecipe): (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'), ] + def get_article_url(self, article): + url = article.get('guid', None) + if 'podcasts' in url: + url = None + return url + def print_version(self, url): return url.replace('http://www.businessweek.com/', 'http://www.businessweek.com/print/')