from calibre.web.feeds.news import BasicNewsRecipe class Barrons(BasicNewsRecipe): title = 'Barron\'s' max_articles_per_feed = 50 needs_subscription = True language = 'en' __author__ = 'Kovid Goyal' description = 'Weekly publication for investors from the publisher of the Wall Street Journal' timefmt = ' [%a, %b %d, %Y]' use_embedded_content = False no_stylesheets = True match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] conversion_options = {'linearize_tables': True} ##delay = 1 # Don't grab articles more than 7 days old oldest_article = 7 use_javascript_to_login = True requires_version = (0, 9, 16) keep_only_tags = [dict(attrs={'class':lambda x: x and (x.startswith('sector one column') or x.startswith('sector two column'))})] remove_tags = [ dict(name='div', attrs={'class':['sTools sTools-t', 'tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}), dict(attrs={'class':['insetButton', 'insettipBox', 'insetClose']}), dict(attrs={'data-module-name':['resp.module.trendingNow.BarronsDesktop', 'resp.module.share_tools.ShareTools']}), dict(name='span', attrs={'data-country-code':True, 'data-ticker-code':True}), ] def javascript_login(self, br, username, password): br.visit('http://commerce.barrons.com/auth/login') f = br.select_form(nr=0) f['username'] = username f['password'] = password br.submit(timeout=120) # Use the print version of a page when available. def print_version(self, url): main, sep, rest = url.rpartition('?') return main + '#text.print' def preprocess_html(self, soup): # Remove thumbnail for zoomable images for div in soup.findAll('div', attrs={'class':lambda x: x and 'insetZoomTargetBox' in x.split()}): img = div.find('img') if img is not None: img.extract() return soup # Comment out the feeds you don't want retrieved. # Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire def get_feeds(self): return [ ('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'), ('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'), ('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'), ('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'), ('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'), ('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'), ] def get_article_url(self, article): return article.get('link', None) def get_cover_url(self): cover_url = None index = 'http://online.barrons.com/home-page' soup = self.index_to_soup(index) link_item = soup.find('ul',attrs={'class':'newsItem barronsMag'}) if link_item: cover_url = link_item.img['src'] return cover_url