calibre/recipes/unica.recipe

# -*- coding: utf-8 -*-
#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
unica.ro
'''

from calibre.web.feeds.news import BasicNewsRecipe

class Unica(BasicNewsRecipe):
    title                     = u'Unica'
    __author__             = u'Silviu Cotoar\u0103'
    description               = 'Asa cum esti tu'
    publisher                 = 'Unica'
    oldest_article            = 5
    language                 = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category               = 'Ziare,Reviste,Femei'
    encoding              = 'utf-8'
    cover_url             = 'http://www.unica.ro/fileadmin/images/logo.gif'

    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }

    keep_only_tags = [
             dict(name='div', attrs={'id':'sticky'})
            , dict(name='p', attrs={'class':'bodytext'})

                     ]

    remove_tags = [
             dict(name='div', attrs={'class':['top-links']})
           , dict(name='div', attrs={'id':['autor_name']})
           , dict(name='div', attrs={'class':['box-r']})
           , dict(name='div', attrs={'class':['category']})
           , dict(name='div', attrs={'class':['data']})
                  ]

    remove_tags_after = [
             dict(name='ul', attrs={'class':'pager'})
            ]

    feeds          = [
        (u'Feeds', u'http://www.unica.ro/rss.html')
                     ]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)