From c0dc8bc5e0f39bab40d2501aee79e3b19e0322b9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Sep 2011 08:20:00 -0600 Subject: [PATCH] Fix American Prospect --- recipes/aprospect.recipe | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/recipes/aprospect.recipe b/recipes/aprospect.recipe index ce230c624a..1e07ce5f52 100644 --- a/recipes/aprospect.recipe +++ b/recipes/aprospect.recipe @@ -1,26 +1,18 @@ -import re from calibre.web.feeds.news import BasicNewsRecipe class AmericanProspect(BasicNewsRecipe): title = u'American Prospect' - __author__ = u'Michael Heinz' - oldest_article = 30 - language = 'en' - max_articles_per_feed = 100 - recursions = 0 - no_stylesheets = True - remove_javascript = True + __author__ = u'Michael Heinz, a.peter' + version = 2 - preprocess_regexps = [ - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: '
'), - (re.compile(r'
.*', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile('\r'),lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''), - ] + oldest_article = 30 + language = 'en' + max_articles_per_feed = 100 + recursions = 0 + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})] + remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})] feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')] -