calibre/recipes/wallstreetro.recipe

# -*- coding: utf-8 -*-
#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
wall-street.ro
'''

from calibre.web.feeds.news import BasicNewsRecipe

class WallStreetRo(BasicNewsRecipe):
    title                 = u'Wall Street'
    __author__            = u'Silviu Cotoar\u0103'
    description           = ''
    publisher             = 'Wall Street'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
    cover_url             = 'http://img.wall-street.ro/images/WS_new_logo.jpg'

    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }

    keep_only_tags = [
                          dict(name='div', attrs={'class':'article_header'})
                        , dict(name='div', attrs={'class':'article_text'})
                     ]

    remove_tags = [
                          dict(name='p', attrs={'class':['page_breadcrumbs']})
                        , dict(name='div', attrs={'id':['article_user_toolbox']})
                        , dict(name='p', attrs={'class':['comments_count_container']})
                        , dict(name='div', attrs={'class':['article_left_column']})
                  ]

    remove_tags_after = [
                            dict(name='div', attrs={'class':'clearfloat'})
                        ]

    feeds          = [
                        (u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
                     ]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)