## ## web2lrf profile to download articles from Barrons.com ## can download subscriber-only content if username and ## password are supplied. ## ''' ''' import re from calibre.web.feeds.news import BasicNewsRecipe class Barrons(BasicNewsRecipe): title = 'Barron\'s' max_articles_per_feed = 50 needs_subscription = True language = 'en' __author__ = 'Kovid Goyal and Sujata Raman' description = 'Weekly publication for investors from the publisher of the Wall Street Journal' timefmt = ' [%a, %b %d, %Y]' use_embedded_content = False no_stylesheets = True match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] conversion_options = {'linearize_tables': True} ##delay = 1 ## Don't grab articles more than 7 days old oldest_article = 7 extra_css = ''' .datestamp{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;} h3{font-family:Georgia,"Times New Roman",Times,serif; } h2{font-family:Georgia,"Times New Roman",Times,serif; } h1{ font-family:Georgia,"Times New Roman",Times,serif; } .byline{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;} .subhead{font-family:Georgia,"Times New Roman",Times,serif; font-size: small;} .articlePage{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;} .insettipUnit{font-size: x-small;} ''' remove_tags = [ dict(name ='div', attrs={'class':['tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}), dict(name = 'a', attrs ={'class':'insetClose'}) ] preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ ## Remove anything before the body of the article. (r'