From 91217add8b2b3c312a81c45b13459309bc0d170b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Mar 2008 02:20:12 +0000 Subject: [PATCH] Fix Wall Street Journal recipe --- src/libprs500/web/feeds/recipes/__init__.py | 2 +- src/libprs500/web/feeds/recipes/wsj.py | 93 +++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 src/libprs500/web/feeds/recipes/wsj.py diff --git a/src/libprs500/web/feeds/recipes/__init__.py b/src/libprs500/web/feeds/recipes/__init__.py index bb92e56a51..a8c3c517b6 100644 --- a/src/libprs500/web/feeds/recipes/__init__.py +++ b/src/libprs500/web/feeds/recipes/__init__.py @@ -6,7 +6,7 @@ Builtin recipes. ''' recipes = [ 'newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio', - 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', + 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', ] import re, imp, inspect, time diff --git a/src/libprs500/web/feeds/recipes/wsj.py b/src/libprs500/web/feeds/recipes/wsj.py new file mode 100644 index 0000000000..f9f9553c43 --- /dev/null +++ b/src/libprs500/web/feeds/recipes/wsj.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +from libprs500.web.feeds.news import BasicNewsRecipe +import re, urlparse + +class WallStreetJournal(BasicNewsRecipe): + + title = 'The Wall Street Journal' + needs_subscription = True + max_articles_per_feed = 10 + timefmt = ' [%a, %b %d, %Y]' + html2lrf_options = ['--ignore-tables'] + + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + ## Remove anything before the body of the article. + (r'