from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1299694372(BasicNewsRecipe): title = u'Klipme' __author__ = 'Ken Sun' publisher = 'Klip.me' category = 'info, custom, Klip.me' oldest_article = 365 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True remove_tags = [ dict(name='div', attrs={'id': 'text_controls_toggle'}), dict(name='script'), dict(name='div', attrs={ 'id': 'text_controls'}), dict(name='div', attrs={'id': 'editing_controls'}), dict(name='div', attrs={'class': 'bar bottom'}) ] use_embedded_content = False needs_subscription = True INDEX = u'http://www.klip.me' LOGIN = INDEX + u'/fav/signin?callback=/fav' feeds = [ (u'Klip.me unread', u'http://www.klip.me/fav'), (u'Klip.me started', u'http://www.klip.me/fav?s=starred') ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['Email'] = self.username if self.password is not None: br['Passwd'] = self.password br.submit() return br def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, 'Fetching feed' + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('table', attrs={'class': ['item', 'item new']}): atag = item.a if atag and atag.has_key('href'): # noqa url = atag['href'] articles.append({ 'url': url }) totalfeeds.append((feedtitle, articles)) return totalfeeds def print_version(self, url): return 'http://www.klip.me' + url def populate_article_metadata(self, article, soup, first): article.title = soup.find('title').contents[0].strip() def postprocess_html(self, soup, first_fetch): for link_tag in soup.findAll(attrs={"id": "story"}): link_tag.insert( 0, '