diff --git a/recipes/st_louis_post_dispatch.recipe b/recipes/st_louis_post_dispatch.recipe index 3b7701cedc..6d22a327ab 100644 --- a/recipes/st_louis_post_dispatch.recipe +++ b/recipes/st_louis_post_dispatch.recipe @@ -7,12 +7,16 @@ class AdvancedUserRecipe1282093204(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 15 + use_embedded_content = False + + no_stylesheets = True + auto_cleanup = True masthead_url = 'http://farm5.static.flickr.com/4118/4929686950_0e22e2c88a.jpg' feeds = [ (u'News-Bill McClellan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fbill-mclellan&f=rss&t=article'), (u'News-Columns', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcolumns*&l=50&f=rss&t=article'), - (u'News-Crime & Courtshttp://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'), + (u'News-Crime & Courts', 'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'), (u'News-Deb Peterson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fdeb-peterson&f=rss&t=article'), (u'News-Education', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2feducation&f=rss&t=article'), (u'News-Government & Politics', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fgovt-and-politics&f=rss&t=article'), @@ -62,9 +66,9 @@ class AdvancedUserRecipe1282093204(BasicNewsRecipe): (u'Entertainment-House-O-Fun', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fhouse-o-fun&l=100&f=rss&t=article'), (u'Entertainment-Kevin C. Johnson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fmusic%2Fkevin-johnson&l=100&f=rss&t=article') ] - remove_empty_feeds = True - remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')] - keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})] + #remove_empty_feeds = True + #remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')] + #keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})] extra_css = 'p {text-align: left;}' diff --git a/recipes/tsn.recipe b/recipes/tsn.recipe index e822ebc633..6c3dbe5159 100644 --- a/recipes/tsn.recipe +++ b/recipes/tsn.recipe @@ -7,28 +7,15 @@ class AdvancedUserRecipe1289990851(BasicNewsRecipe): language = 'en_CA' __author__ = 'Nexus' no_stylesheets = True + auto_cleanup = True + use_embedded_content = False INDEX = 'http://tsn.ca/nhl/story/?id=nhl' - keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}), - dict(name='div', attrs={'id':['tsnStory']})] - remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}), - dict(name='div', attrs={'class':'textSize'})] - - def parse_index(self): - feeds = [] - soup = self.index_to_soup(self.INDEX) - feed_parts = soup.findAll('div', attrs={'class': 'feature'}) - for feed_part in feed_parts: - articles = [] - if not feed_part.h2: - continue - feed_title = feed_part.h2.string - article_parts = feed_part.findAll('a') - for article_part in article_parts: - article_title = article_part.string - article_date = '' - article_url = 'http://tsn.ca/' + article_part['href'] - articles.append({'title': article_title, 'url': article_url, 'description':'', 'date':article_date}) - if articles: - feeds.append((feed_title, articles)) - return feeds + #keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}), + #dict(name='div', attrs={'id':['tsnStory']})] + #remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}), + #dict(name='div', attrs={'class':'textSize'})] + feeds = [ +('News', + 'http://www.tsn.ca/datafiles/rss/Stories.xml'), +]