mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
79 lines
2.7 KiB
Plaintext
79 lines
2.7 KiB
Plaintext
# -*- coding: utf-8 -*-
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
class NYTimes(BasicNewsRecipe):
|
|
|
|
title = 'New England Journal of Medicine'
|
|
__author__ = 'Kovid Goyal'
|
|
description = 'Medical news'
|
|
timefmt = ' [%d %b, %Y]'
|
|
needs_subscription = True
|
|
language = 'en'
|
|
|
|
no_stylesheets = True
|
|
keep_only_tags = dict(id='content')
|
|
|
|
|
|
#TO LOGIN
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser()
|
|
br.open('http://www.nejm.org/action/showLogin?uri=http://www.nejm.org/')
|
|
br.select_form(name='frmLogin')
|
|
br['login'] = self.username
|
|
br['password'] = self.password
|
|
response = br.submit()
|
|
raw = response.read()
|
|
if '>Sign Out<' not in raw:
|
|
raise Exception('Login failed. Check your username and password')
|
|
return br
|
|
|
|
#TO GET ARTICLE TOC
|
|
def nejm_get_index(self):
|
|
return self.index_to_soup('http://content.nejm.org/current.dtl')
|
|
|
|
# To parse artice toc
|
|
def parse_index(self):
|
|
parse_soup = self.nejm_get_index()
|
|
|
|
feeds = []
|
|
|
|
div = parse_soup.find(attrs={'class':'tocContent'})
|
|
for group in div.findAll(attrs={'class':'articleGrouping'}):
|
|
feed_title = group.find(attrs={'class':'articleType'})
|
|
if feed_title is None:
|
|
continue
|
|
feed_title = self.tag_to_string(feed_title)
|
|
articles = []
|
|
self.log('Found section:', feed_title)
|
|
for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
|
|
in x}):
|
|
link = art.find(attrs={'class':lambda x:x and 'articleLink' in
|
|
x})
|
|
if link is None:
|
|
continue
|
|
a = link.find('a', href=True)
|
|
if a is None:
|
|
continue
|
|
url = a.get('href')
|
|
if url.startswith('/'):
|
|
url = 'http://www.nejm.org'+url
|
|
title = self.tag_to_string(a)
|
|
self.log.info('\tFound article:', title, 'at', url)
|
|
article = {'title':title, 'url':url, 'date':''}
|
|
au = art.find(attrs={'class':'articleAuthors'})
|
|
if au is not None:
|
|
article['author'] = self.tag_to_string(au)
|
|
desc = art.find(attrs={'class':'hover_text'})
|
|
if desc is not None:
|
|
desc = self.tag_to_string(desc)
|
|
if 'author' in article:
|
|
desc = ' by ' + article['author'] + ' ' +desc
|
|
article['description'] = desc
|
|
articles.append(article)
|
|
if articles:
|
|
feeds.append((feed_title, articles))
|
|
|
|
return feeds
|
|
|
|
|