# -*- coding: utf-8 -*- from calibre.web.feeds.recipes import BasicNewsRecipe class NYTimes(BasicNewsRecipe): title = 'New England Journal of Medicine' __author__ = 'Kovid Goyal' description = 'Medical news' timefmt = ' [%d %b, %Y]' needs_subscription = True language = 'en' no_stylesheets = True keep_only_tags = dict(id='content') #TO LOGIN def get_browser(self): br = BasicNewsRecipe.get_browser() br.open('http://www.nejm.org/action/showLogin?uri=http://www.nejm.org/') br.select_form(name='frmLogin') br['login'] = self.username br['password'] = self.password response = br.submit() raw = response.read() if '>Sign Out<' not in raw: raise Exception('Login failed. Check your username and password') return br #TO GET ARTICLE TOC def nejm_get_index(self): return self.index_to_soup('http://content.nejm.org/current.dtl') # To parse artice toc def parse_index(self): parse_soup = self.nejm_get_index() feeds = [] div = parse_soup.find(attrs={'class':'tocContent'}) for group in div.findAll(attrs={'class':'articleGrouping'}): feed_title = group.find(attrs={'class':'articleType'}) if feed_title is None: continue feed_title = self.tag_to_string(feed_title) articles = [] self.log('Found section:', feed_title) for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry' in x}): link = art.find(attrs={'class':lambda x:x and 'articleLink' in x}) if link is None: continue a = link.find('a', href=True) if a is None: continue url = a.get('href') if url.startswith('/'): url = 'http://www.nejm.org'+url title = self.tag_to_string(a) self.log.info('\tFound article:', title, 'at', url) article = {'title':title, 'url':url, 'date':''} au = art.find(attrs={'class':'articleAuthors'}) if au is not None: article['author'] = self.tag_to_string(au) desc = art.find(attrs={'class':'hover_text'}) if desc is not None: desc = self.tag_to_string(desc) if 'author' in article: desc = ' by ' + article['author'] + ' ' +desc article['description'] = desc articles.append(article) if articles: feeds.append((feed_title, articles)) return feeds