calibre/recipes/readitlater.recipe
2011-09-29 15:11:02 -06:00

84 lines
3.1 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
'''
'''
readitlaterlist.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Readitlater(BasicNewsRecipe):
title = 'Read It Later'
__author__ = 'Darko Miletic, Przemyslaw Kryger'
description = '''Personalized news feeds. Go to readitlaterlist.com to
setup up your news. Fill in your account
username, and optionally you can add password.'''
publisher = 'readitlater.com'
category = 'news, custom'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
INDEX = u'http://readitlaterlist.com'
LOGIN = INDEX + u'/l'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None:
br.open(self.LOGIN)
br.select_form(nr=0)
br['feed_id'] = self.username
if self.password is not None:
br['password'] = self.password
br.submit()
return br
def get_feeds(self):
self.report_progress(0, ('Fetching list of feeds...'))
lfeeds = []
i = 1
feedurl = self.INDEX + u'/unread/1'
while True:
title = u'Unread articles, page ' + str(i)
lfeeds.append((title, feedurl))
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
i += 1
soup = self.index_to_soup(feedurl)
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
if ritem is None:
break
feedurl = self.INDEX + ritem['href']
if self.test:
return lfeeds[:2]
return lfeeds
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul',attrs={'id':'list'})
for item in ritem.findAll('li'):
description = ''
atag = item.find('a',attrs={'class':'text'})
if atag and atag.has_key('href'):
url = self.INDEX + atag['href']
title = self.tag_to_string(item.div)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds