# Calibre recipe for Instapaper.com (Stable version) # # Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/ # Code Repository: https://bitbucket.org/khromov/calibre-instapaper from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1299694372(BasicNewsRecipe): title = u'Instapaper' __author__ = 'Darko Miletic, Stanislav Khromov, Jim Ramsay' publisher = 'Instapaper.com' category = 'info, custom, Instapaper' oldest_article = 365 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True remove_tags = [ dict(name='div', attrs={'id':'text_controls_toggle'}) ,dict(name='script') ,dict(name='div', attrs={'id':'text_controls'}) ,dict(name='div', attrs={'id':'editing_controls'}) ,dict(name='div', attrs={'class':'bar bottom'}) ,dict(name='div', attrs={'id':'controlbar_container'}) ,dict(name='div', attrs={'id':'footer'}) ] use_embedded_content = False needs_subscription = True INDEX = u'http://www.instapaper.com' LOGIN = INDEX + u'/user/login' feeds = [ (u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred') ] #Adds the title tag to the body of the recipe. Use this if your articles miss headings. add_title_tag = False; def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['username'] = self.username if self.password is not None: br['password'] = self.password br.submit() return br def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class':'cornerControls'}): #description = self.tag_to_string(item.div) atag = item.a if atag and atag.has_key('href'): url = atag['href'] articles.append({ 'url' :url }) totalfeeds.append((feedtitle, articles)) return totalfeeds def print_version(self, url): return 'http://www.instapaper.com' + url def populate_article_metadata(self, article, soup, first): article.title = soup.find('title').contents[0].strip() def postprocess_html(self, soup, first_fetch): #adds the title to each story, as it is not always included if self.add_title_tag: for link_tag in soup.findAll(attrs={"id" : "story"}): link_tag.insert(0,'