calibre/recipes/readitlater.recipe

"""
Pocket Calibre Recipe v1.2
"""
__license__   = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
2012, tBunnyMan <Wag That Tail At Me dot com>
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe


class Pocket(BasicNewsRecipe):
    title                 = 'Pocket'
    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
    description           = '''Personalized news feeds. Go to getpocket.com to setup up \
                            your news. This version displays pages of articles from \
                            oldest to newest, with max & minimum counts, and marks articles \
                            read after downloading.'''
    publisher             = 'getpocket.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 50
    minimum_articles      = 10
    mark_as_read_after_dl = True
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://getpocket.com'
    LOGIN                 = INDEX + u'/l'
    readList              = []


    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
                br['password'] = self.password
            br.submit()
        return br

    def get_feeds(self):
        self.report_progress(0, ('Fetching list of pages...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
            lfeeds.insert(0, (title, feedurl))
            self.report_progress(0, ('Got ') + str(i) + (' pages'))
            i += 1
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
        return lfeeds

    def parse_index(self):
        totalfeeds = []
        articlesToGrab = self.max_articles_per_feed
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            if articlesToGrab < 1:
                break
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul', attrs={'id':'list'})
            if ritem is None:
                self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
                continue
            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
                else:
                    articlesToGrab -= 1
                description = ''
                atag = item.find('a', attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
                    readLink = item.find('a', attrs={'class':'check'})['href']
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
            self.mark_as_read_after_dl = False
            if hasattr(self, 'abort_recipe_processing'):
               self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
            else:
                self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
                return []
        return totalfeeds

    def mark_as_read(self, markList):
        br = self.get_browser()
        for link in markList:
            url = self.INDEX + link
            print 'Marking read: ', url
            response = br.open(url)
            print response.info()

    def cleanup(self):
        if self.mark_as_read_after_dl:
            self.mark_as_read(self.readList)
        else:
            pass

    def default_cover(self, cover_file):
        '''
        Create a generic cover for recipes that don't have a cover
        This override adds time to the cover
        '''
        try:
            from calibre.ebooks import calibre_cover
            title = self.title if isinstance(self.title, unicode) else \
                    self.title.decode('utf-8', 'replace')
            date = strftime(self.timefmt)
            time = strftime('[%I:%M %p]')
            img_data = calibre_cover(title, date, time)
            cover_file.write(img_data)
            cover_file.flush()
        except:
            self.log.exception('Failed to generate default cover')
            return False
        return True