calibre/recipes/paperli_topic.recipe

__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
paperli
'''

from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime


class paperli_topics(BasicNewsRecipe):
    # Customize this recipe and change paperli_tag and title below to
    # download news on your favorite tag
    paperli_tag = 'climate'
    title = u'The #climate Daily - paperli'
    __author__ = 'Hiroshi Miura'
    oldest_article = 7
    max_articles_per_feed = 100
    description = 'paper.li page about ' + paperli_tag
    publisher = 'paper.li'
    category = 'paper.li'
    language = 'en'
    encoding = 'utf-8'
    remove_javascript = True
    masthead_title = u'The ' + paperli_tag + ' Daily'
    timefmt = '[%y/%m/%d]'
    base_url = 'http://paper.li'
    index = base_url + '/tag/' + paperli_tag

    def parse_index(self):
        # get topics
        topics = []
        soup = self.index_to_soup(self.index)
        topics_lists = soup.find('div', attrs={'class': 'paper-nav-bottom'})
        for item in topics_lists.findAll('li', attrs={'class': ""}):
            itema = item.find('a', href=True)
            topics.append({'title': itema.string, 'url': itema['href']})

        # get feeds
        feeds = []
        for topic in topics:
            newsarticles = []
            soup = self.index_to_soup(''.join([self.base_url, topic['url']]))
            topstories = soup.findAll('div', attrs={'class': 'yui-u'})
            for itt in topstories:
                itema = itt.find('a', href=True, attrs={'class': 'ts'})
                if itema is not None:
                    itemd = itt.find('div', text=True, attrs={'class': 'text'})
                    newsarticles.append({
                        'title': itema.string, 'date': strftime(self.timefmt), 'url': itema['href'], 'description': itemd.string
                    })
            feeds.append((topic['title'], newsarticles))
        return feeds