calibre/recipes/readitlater.recipe

"""
readitlaterlist.com
"""
__license__   = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
2012, tBunnyMan <Wag That Tail At Me dot com>
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe


class Readitlater(BasicNewsRecipe):
    title                 = 'ReadItLater'
    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
                            up your news. This version displays pages of articles from \
                            oldest to newest, with max & minimum counts, and marks articles \
                            read after downloading.'''
    publisher             = 'readitlaterlist.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 50
    minimum_articles      = 1
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
    readList              = []


    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
                br['password'] = self.password
            br.submit()
        return br

    def get_feeds(self):
        self.report_progress(0, ('Fetching list of pages...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
            lfeeds.insert(0, (title, feedurl))
            self.report_progress(0, ('Got ') + str(i) + (' pages'))
            i += 1
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
        return lfeeds

    def parse_index(self):
        totalfeeds = []
        articlesToGrab = self.max_articles_per_feed
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            if articlesToGrab < 1:
                break
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul', attrs={'id':'list'})
            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
                else:
                    articlesToGrab -= 1
                description = ''
                atag = item.find('a', attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
                    readLink = item.find('a', attrs={'class':'check'})['href']
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
        return totalfeeds

    def mark_as_read(self, markList):
        br = self.get_browser()
        for link in markList:
            url = self.INDEX + link
            response = br.open(url)
            response

    def cleanup(self):
        self.mark_as_read(self.readList)