calibre/recipes/kosmonauta_pl.recipe

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from calibre.web.feeds.news import BasicNewsRecipe
class Kosmonauta(BasicNewsRecipe):
    title = u'Kosmonauta.net'
    __author__ = 'fenuks'
    description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
    category = 'astronomy'
    language = 'pl'
    cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
    extra_css = '.thumbnail {float:left;margin-right:5px;}'
    no_stylesheets = True
    INDEX = 'http://www.kosmonauta.net'
    oldest_article = 7
    no_stylesheets = True
    remove_javascript = True
    remove_attributes = ['style']
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
    remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']}), dict(attrs={'alt':['Poprzednia strona', 'Następna strona']})]
    remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
    feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]

    def print_version(self, url):
        return url + '?tmpl=component&print=1&layout=default&page='

    def preprocess_html(self, soup):
        for a in soup.findAll(name='a'):
           if a.has_key('href'):
               href = a['href']
               if not href.startswith('http'):
                   a['href'] = self.INDEX + href
        return soup