__license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' paperli ''' from calibre.web.feeds.news import BasicNewsRecipe from calibre import strftime class paperli_topics(BasicNewsRecipe): # Customize this recipe and change paperli_tag and title below to # download news on your favorite tag paperli_tag = 'climate' title = u'The #climate Daily - paperli' __author__ = 'Hiroshi Miura' oldest_article = 7 max_articles_per_feed = 100 description = 'paper.li page about ' + paperli_tag publisher = 'paper.li' category = 'paper.li' language = 'en' encoding = 'utf-8' remove_javascript = True masthead_title = u'The ' + paperli_tag + ' Daily' timefmt = '[%y/%m/%d]' base_url = 'http://paper.li' index = base_url + '/tag/' + paperli_tag def parse_index(self): # get topics topics = [] soup = self.index_to_soup(self.index) topics_lists = soup.find('div', attrs={'class': 'paper-nav-bottom'}) for item in topics_lists.findAll('li', attrs={'class': ""}): itema = item.find('a', href=True) topics.append({'title': itema.string, 'url': itema['href']}) # get feeds feeds = [] for topic in topics: newsarticles = [] soup = self.index_to_soup(''.join([self.base_url, topic['url']])) topstories = soup.findAll('div', attrs={'class': 'yui-u'}) for itt in topstories: itema = itt.find('a', href=True, attrs={'class': 'ts'}) if itema is not None: itemd = itt.find('div', text=True, attrs={'class': 'text'}) newsarticles.append({ 'title': itema.string, 'date': strftime(self.timefmt), 'url': itema['href'], 'description': itemd.string }) feeds.append((topic['title'], newsarticles)) return feeds