# Calibre recipe for Instapaper.com (Stable version) # # Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/ # Source: https://github.com/kovidgoyal/calibre/blob/master/recipes/instapaper.recipe from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1299694372(BasicNewsRecipe): title = u'Instapaper' __author__ = 'Darko Miletic, Stanislav Khromov, Jim Ramsay' publisher = 'Instapaper.com' category = 'info, custom, Instapaper' oldest_article = 365 max_articles_per_feed = 100 # reverse_article_order = True no_stylesheets = False extra_css = 'q { font-style: italic; } .size3mode { color: black; }' remove_javascript = True remove_tags = [ dict(name='div', attrs={'id': 'text_controls_toggle'}), dict(name='script'), dict(name='div', attrs={'id': 'text_controls'}), dict(name='section', attrs={'class': 'primary_bar'}), dict(name='div', attrs={'class': 'modal_group'}), dict(name='div', attrs={'id': 'editing_controls'}), dict(name='div', attrs={'class': 'modal_name'}), dict(name='div', attrs={'class': 'highlight_popover'}), dict(name='div', attrs={'class': 'bar bottom'}), dict(name='div', attrs={'class': 'evernote_confirm'}), dict(name='div', attrs={'id': 'controlbar_container'}), dict(name='div', attrs={'id': 'footer'}), dict(name='div', attrs={'id': 'speedRead'}), dict(name='label') ] use_embedded_content = False needs_subscription = True INDEX = u'https://www.instapaper.com' LOGIN = INDEX + u'/user/login' feeds = [ (u'Instapaper Unread', u'https://www.instapaper.com/u') # (u'Instapaper Starred', u'https://www.instapaper.com/starred') ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['username'] = self.username if self.password is not None: br['password'] = self.password br.submit() return br def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, 'Fetching feed' + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('a', attrs={'class': 'article_title'}): articles.append({ 'url': 'https://www.instapaper.com' + item['href'], 'title': item['title'] }) totalfeeds.append((feedtitle, articles)) return totalfeeds