diff --git a/src/libprs500/ebooks/lrf/web/convert_from.py b/src/libprs500/ebooks/lrf/web/convert_from.py index 0c4c79088b..046c17b8ab 100644 --- a/src/libprs500/ebooks/lrf/web/convert_from.py +++ b/src/libprs500/ebooks/lrf/web/convert_from.py @@ -32,9 +32,13 @@ from libprs500.ebooks.lrf.web.profiles.newyorkreview import NewYorkReviewOfBooks from libprs500.ebooks.lrf.web.profiles.spiegelde import SpiegelOnline from libprs500.ebooks.lrf.web.profiles.zeitde import ZeitNachrichten from libprs500.ebooks.lrf.web.profiles.faznet import FazNet +from libprs500.ebooks.lrf.web.profiles.wsj import WallStreetJournal +from libprs500.ebooks.lrf.web.profiles.barrons import Barrons +from libprs500.ebooks.lrf.web.profiles.portfolio import Portfolio -builtin_profiles = [NYTimes, BBC, Newsweek, Economist, NewYorkReviewOfBooks, \ - SpiegelOnline, ZeitNachrichten, FazNet] +builtin_profiles = [NYTimes, BBC, Newsweek, Economist, NewYorkReviewOfBooks, \ + SpiegelOnline, ZeitNachrichten, FazNet, WallStreetJournal, \ + Barrons, Portfolio] available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles] def option_parser(): diff --git a/src/libprs500/ebooks/lrf/web/profiles/barrons.py b/src/libprs500/ebooks/lrf/web/profiles/barrons.py new file mode 100644 index 0000000000..2791b5c008 --- /dev/null +++ b/src/libprs500/ebooks/lrf/web/profiles/barrons.py @@ -0,0 +1,89 @@ +## +## web2lrf profile to download articles from Barrons.com +## can download subscriber-only content if username and +## password are supplied. +## +''' +''' + +import re + +from libprs500.ebooks.lrf.web.profiles import DefaultProfile + +class Barrons(DefaultProfile): + + title = 'Barron\'s' + max_recursions = 3 + max_articles_per_feed = 50 + timefmt = ' [%a, %b %d, %Y]' + html_description = True + no_stylesheets = False + match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] + html2lrf_options = [('--ignore-tables'),('--base-font-size=10')] + ##delay = 1 + + ## Don't grab articles more than 7 days old + oldest_article = 7 + + + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + ## Remove anything before the body of the article. + (r'