From f5f35cd1edeaa2cbe2018c79f32dee7dc49e014a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Jun 2011 11:41:59 -0600 Subject: [PATCH] Fix #791481 (CNN News fails to download as of 5/31 (previous version)) --- recipes/cnn.recipe | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/recipes/cnn.recipe b/recipes/cnn.recipe index a2b6665033..ccf47e26d8 100644 --- a/recipes/cnn.recipe +++ b/recipes/cnn.recipe @@ -3,6 +3,8 @@ __copyright__ = '2008, Kovid Goyal ' ''' Profile to download CNN ''' + +import re from calibre.web.feeds.news import BasicNewsRecipe class CNN(BasicNewsRecipe): @@ -20,12 +22,25 @@ class CNN(BasicNewsRecipe): #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html'] max_articles_per_feed = 25 + preprocess_regexps = [ + (re.compile(r'', re.DOTALL), lambda m: ''), + (re.compile(r'', re.DOTALL), lambda m: ''), + (re.compile(r'', re.DOTALL), lambda m: ''), + ] + + keep_only_tags = [dict(id='cnnContentContainer')] + remove_tags = [ + {'class':['cnn_strybtntools', 'cnn_strylftcntnt', + 'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt', + 'cnn_strycntntrgt']}, + ] + feeds = [ ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'), ('World', 'http://rss.cnn.com/rss/cnn_world.rss'), ('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'), - ('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'), + #('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'), ('Business', 'http://rss.cnn.com/rss/money_latest.rss'), ('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'), ('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),