Improved Read It Later

2025-11-26 16:25:02 -05:00 · 2012-02-04 10:27:26 +05:30 · 2012-02-04 10:27:26 +05:30 · 9c9ed534e3
commit 9c9ed534e3
parent b65b295dd9
1 changed files with 45 additions and 20 deletions
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,30 +1,36 @@
+"""
+readitlaterlist.com
+"""
 __license__   = 'GPL v3'
 __copyright__ = '''
 2010, Darko Miletic <darko.miletic at gmail.com>
 2011, Przemyslaw Kryger <pkryger at gmail.com>
-'''
-'''
-readitlaterlist.com
+2012, tBunnyMan <Wag That Tail At Me dot com>
 '''

 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

+
 class Readitlater(BasicNewsRecipe):
-    title                 = 'Read It Later'
-    __author__            = 'Darko Miletic, Przemyslaw Kryger'
-    description           = '''Personalized news feeds. Go to readitlaterlist.com to
-                               setup up your news. Fill in your account
-                               username, and optionally you can add password.'''
-    publisher             = 'readitlater.com'
+    title                 = 'ReadItLater'
+    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
+                            up your news. This version displays pages of articles from \
+                            oldest to newest, with max & minimum counts, and marks articles \
+                            read after downloading.'''
+    publisher             = 'readitlaterlist.com'
    category              = 'news, custom'
    oldest_article        = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 50
+    minimum_articles      = 1
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
+    readList              = []
+

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe):
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
-               br['password'] = self.password
+                br['password'] = self.password
            br.submit()
        return br

    def get_feeds(self):
-        self.report_progress(0, ('Fetching list of feeds...'))
+        self.report_progress(0, ('Fetching list of pages...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
-            lfeeds.append((title, feedurl))
-            self.report_progress(0, ('Got ') + str(i) + (' feeds'))
+            lfeeds.insert(0, (title, feedurl))
+            self.report_progress(0, ('Got ') + str(i) + (' pages'))
            i += 1
            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
+            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
-        if self.test:
-            return lfeeds[:2]
        return lfeeds

    def parse_index(self):
        totalfeeds = []
+        articlesToGrab = self.max_articles_per_feed
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
+            if articlesToGrab < 1:
+                break
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('ul',attrs={'id':'list'})
-            for item in ritem.findAll('li'):
+            ritem = soup.find('ul', attrs={'id':'list'})
+            for item in reversed(ritem.findAll('li')):
+                if articlesToGrab < 1:
+                    break
+                else:
+                    articlesToGrab -= 1
                description = ''
-                atag = item.find('a',attrs={'class':'text'})
+                atag = item.find('a', attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
                                     ,'url'        :url
                                     ,'description':description
                                    })
+                    readLink = item.find('a', attrs={'class':'check'})['href']
+                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
+        if len(self.readList) < self.minimum_articles:
+            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
        return totalfeeds

+    def mark_as_read(self, markList):
+        br = self.get_browser()
+        for link in markList:
+            url = self.INDEX + link
+            response = br.open(url)
+            response
+
+    def cleanup(self):
+        self.mark_as_read(self.readList)
+