diff --git a/src/libprs500/web/feeds/recipes/__init__.py b/src/libprs500/web/feeds/recipes/__init__.py index bb92e56a51..a8c3c517b6 100644 --- a/src/libprs500/web/feeds/recipes/__init__.py +++ b/src/libprs500/web/feeds/recipes/__init__.py @@ -6,7 +6,7 @@ Builtin recipes. ''' recipes = [ 'newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio', - 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', + 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', ] import re, imp, inspect, time diff --git a/src/libprs500/web/feeds/recipes/wsj.py b/src/libprs500/web/feeds/recipes/wsj.py new file mode 100644 index 0000000000..f9f9553c43 --- /dev/null +++ b/src/libprs500/web/feeds/recipes/wsj.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +from libprs500.web.feeds.news import BasicNewsRecipe +import re, urlparse + +class WallStreetJournal(BasicNewsRecipe): + + title = 'The Wall Street Journal' + needs_subscription = True + max_articles_per_feed = 10 + timefmt = ' [%a, %b %d, %Y]' + html2lrf_options = ['--ignore-tables'] + + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + ## Remove anything before the body of the article. + (r'