calibre/recipes/emuzica_pl.recipe

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from calibre.web.feeds.news import BasicNewsRecipe


class eMuzyka(BasicNewsRecipe):
    title = u'eMuzyka'
    __author__ = 'fenuks'
    description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
    category = 'music'
    language = 'pl'
    index = 'http://www.emuzyka.pl'
    cover_url = 'http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
    remove_empty_feeds = True
    max_articles_per_feed = 100
    remove_attributes = ['style']
    keep_only_tags = [dict(name='div', attrs={'id': 'news_container'}), dict(
        name='h3'), dict(name='div', attrs={'class': 'review_text'})]
    remove_tags = [dict(name='span', attrs={'id': 'date'})]
    feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'),
             (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]

    def preprocess_html(self, soup):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:  # noqa
                a['href'] = self.index + a['href']
        return soup