calibre/recipes/readitlater.recipe

__license__   = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
'''
'''
readitlaterlist.com
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class Readitlater(BasicNewsRecipe):
    title                 = 'Read It Later'
    __author__            = 'Darko Miletic, Przemyslaw Kryger'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to
                               setup up your news. Fill in your account
                               username, and optionally you can add password.'''
    publisher             = 'readitlater.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
               br['password'] = self.password
            br.submit()
        return br

    def get_feeds(self):
        self.report_progress(0, ('Fetching list of feeds...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
            lfeeds.append((title, feedurl))
            self.report_progress(0, ('Got ') + str(i) + (' feeds'))
            i += 1
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
        if self.test:
            return lfeeds[:2]
        return lfeeds

    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul',attrs={'id':'list'})
            for item in ritem.findAll('li'):
                description = ''
                atag = item.find('a',attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds