__license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' paperli ''' from calibre.web.feeds.news import BasicNewsRecipe from calibre import strftime class paperli_topics(BasicNewsRecipe): # Customize this recipe and change paperli_tag and title below to # download news on your favorite tag paperli_tag = 'climate' title = u'The #climate Daily - paperli' #------------------------------------------------------------- __author__ = 'Hiroshi Miura' oldest_article = 7 max_articles_per_feed = 100 description = 'paper.li page about '+ paperli_tag publisher = 'paper.li' category = 'paper.li' language = 'en' encoding = 'utf-8' remove_javascript = True masthead_title = u'The '+ paperli_tag +' Daily' timefmt = '[%y/%m/%d]' base_url = 'http://paper.li' index = base_url+'/tag/'+paperli_tag def parse_index(self): # get topics topics = [] soup = self.index_to_soup(self.index) topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'}) for item in topics_lists.findAll('li', attrs={'class':""}): itema = item.find('a',href=True) topics.append({'title': itema.string, 'url': itema['href']}) #get feeds feeds = [] for topic in topics: newsarticles = [] soup = self.index_to_soup(''.join([self.base_url, topic['url'] ])) topstories = soup.findAll('div',attrs={'class':'yui-u'}) for itt in topstories: itema = itt.find('a',href=True,attrs={'class':'ts'}) if itema is not None: itemd = itt.find('div',text=True, attrs={'class':'text'}) newsarticles.append({ 'title' :itema.string ,'date' :strftime(self.timefmt) ,'url' :itema['href'] ,'description':itemd.string }) feeds.append((topic['title'], newsarticles)) return feeds