""" Pocket Calibre Recipe v1.2 """ __license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger 2012, tBunnyMan ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Pocket(BasicNewsRecipe): title = 'Pocket' __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' description = '''Personalized news feeds. Go to getpocket.com to setup up \ your news. This version displays pages of articles from \ oldest to newest, with max & minimum counts, and marks articles \ read after downloading.''' publisher = 'getpocket.com' category = 'news, custom' oldest_article = 7 max_articles_per_feed = 50 minimum_articles = 10 mark_as_read_after_dl = True no_stylesheets = True use_embedded_content = False needs_subscription = True INDEX = u'http://getpocket.com' LOGIN = INDEX + u'/l' readList = [] def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['feed_id'] = self.username if self.password is not None: br['password'] = self.password br.submit() return br def get_feeds(self): self.report_progress(0, ('Fetching list of pages...')) lfeeds = [] i = 1 feedurl = self.INDEX + u'/unread/1' while True: title = u'Unread articles, page ' + str(i) lfeeds.insert(0, (title, feedurl)) self.report_progress(0, ('Got ') + str(i) + (' pages')) i += 1 soup = self.index_to_soup(feedurl) ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) if ritem is None: break feedurl = self.INDEX + ritem['href'] return lfeeds def parse_index(self): totalfeeds = [] articlesToGrab = self.max_articles_per_feed lfeeds = self.get_feeds() for feedobj in lfeeds: if articlesToGrab < 1: break feedtitle, feedurl = feedobj self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) ritem = soup.find('ul', attrs={'id':'list'}) if ritem is None: self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl)) continue for item in reversed(ritem.findAll('li')): if articlesToGrab < 1: break else: articlesToGrab -= 1 description = '' atag = item.find('a', attrs={'class':'text'}) if atag and atag.has_key('href'): url = self.INDEX + atag['href'] title = self.tag_to_string(item.div) date = strftime(self.timefmt) articles.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) readLink = item.find('a', attrs={'class':'check'})['href'] self.readList.append(readLink) totalfeeds.append((feedtitle, articles)) if len(self.readList) < self.minimum_articles: self.mark_as_read_after_dl = False if hasattr(self, 'abort_recipe_processing'): self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList)) else: self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList)) return [] return totalfeeds def mark_as_read(self, markList): br = self.get_browser() for link in markList: url = self.INDEX + link print 'Marking read: ', url response = br.open(url) print response.info() def cleanup(self): if self.mark_as_read_after_dl: self.mark_as_read(self.readList) else: pass def default_cover(self, cover_file): ''' Create a generic cover for recipes that don't have a cover This override adds time to the cover ''' try: from calibre.ebooks import calibre_cover title = self.title if isinstance(self.title, unicode) else \ self.title.decode('utf-8', 'replace') date = strftime(self.timefmt) time = strftime('[%I:%M %p]') img_data = calibre_cover(title, date, time) cover_file.write(img_data) cover_file.flush() except: self.log.exception('Failed to generate default cover') return False return True