From 9c9ed534e38c08f9e88a3bbe942e44a014f38981 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 4 Feb 2012 10:27:26 +0530 Subject: [PATCH] Improved Read It Later --- recipes/readitlater.recipe | 65 ++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index ea9c92868b..38f7ec1a9a 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,30 +1,36 @@ +""" +readitlaterlist.com +""" __license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger -''' -''' -readitlaterlist.com +2012, tBunnyMan ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe + class Readitlater(BasicNewsRecipe): - title = 'Read It Later' - __author__ = 'Darko Miletic, Przemyslaw Kryger' - description = '''Personalized news feeds. Go to readitlaterlist.com to - setup up your news. Fill in your account - username, and optionally you can add password.''' - publisher = 'readitlater.com' + title = 'ReadItLater' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' + description = '''Personalized news feeds. Go to readitlaterlist.com to setup \ + up your news. This version displays pages of articles from \ + oldest to newest, with max & minimum counts, and marks articles \ + read after downloading.''' + publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 50 + minimum_articles = 1 no_stylesheets = True use_embedded_content = False needs_subscription = True INDEX = u'http://readitlaterlist.com' LOGIN = INDEX + u'/l' + readList = [] + def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe): br.select_form(nr=0) br['feed_id'] = self.username if self.password is not None: - br['password'] = self.password + br['password'] = self.password br.submit() return br def get_feeds(self): - self.report_progress(0, ('Fetching list of feeds...')) + self.report_progress(0, ('Fetching list of pages...')) lfeeds = [] i = 1 feedurl = self.INDEX + u'/unread/1' while True: title = u'Unread articles, page ' + str(i) - lfeeds.append((title, feedurl)) - self.report_progress(0, ('Got ') + str(i) + (' feeds')) + lfeeds.insert(0, (title, feedurl)) + self.report_progress(0, ('Got ') + str(i) + (' pages')) i += 1 soup = self.index_to_soup(feedurl) - ritem = soup.find('a',attrs={'id':'next', 'class':'active'}) + ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) if ritem is None: break feedurl = self.INDEX + ritem['href'] - if self.test: - return lfeeds[:2] return lfeeds def parse_index(self): totalfeeds = [] + articlesToGrab = self.max_articles_per_feed lfeeds = self.get_feeds() for feedobj in lfeeds: + if articlesToGrab < 1: + break feedtitle, feedurl = feedobj self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) - ritem = soup.find('ul',attrs={'id':'list'}) - for item in ritem.findAll('li'): + ritem = soup.find('ul', attrs={'id':'list'}) + for item in reversed(ritem.findAll('li')): + if articlesToGrab < 1: + break + else: + articlesToGrab -= 1 description = '' - atag = item.find('a',attrs={'class':'text'}) + atag = item.find('a', attrs={'class':'text'}) if atag and atag.has_key('href'): url = self.INDEX + atag['href'] title = self.tag_to_string(item.div) @@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe): ,'url' :url ,'description':description }) + readLink = item.find('a', attrs={'class':'check'})['href'] + self.readList.append(readLink) totalfeeds.append((feedtitle, articles)) + if len(self.readList) < self.minimum_articles: + raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") return totalfeeds + def mark_as_read(self, markList): + br = self.get_browser() + for link in markList: + url = self.INDEX + link + response = br.open(url) + response + + def cleanup(self): + self.mark_as_read(self.readList) +